import Neuronet import System.Random(randomRIO) import Numeric.LinearAlgebra import Data.List import Data.Foldable(foldlM) import Data.List.Split import Data.Tuple.Extra import System.Random.Shuffle import qualified Data.ByteString as BS import System.IO import Control.DeepSeq -- a single data-sample with input and expected output type Sample = (Vector Double,Vector Double) -- a list of samples type Samples =[Sample] -- split samples into random batches of n elements batches :: Int -> Samples -> IO [Samples] batches n x = do s<-shuffleM x return . filter ((==n).length) $ chunksOf n s -- get multiple epochs, with the given number of samples each. epochs :: Int -> Int -> Samples -> IO [[Samples]] epochs 0 _ _ = return [] epochs e n s = (:) <$> batches n s <*> epochs (e-1) n s -- train for multiple epochs, optionally . testing after each. training :: Bool -> (Neuronet->String) -> Double -> Neuronet -> [[Samples]] -> IO Neuronet training tst tstf r net s = foldlM f net (zip s [1..]) where f nt (v,i) = do putStr $ "Epoch "++ show i ++ "...." let n = foldl' (\n x->train r n (fst.unzip $ x) (snd.unzip $ x)) nt v putStrLn $ tstf n return n -- test with given samples and return number of correct answers testing :: (Vector Double -> Vector Double -> Bool) -> Neuronet -> Samples -> Int testing f net s = length . filter id $ map (\(x,y)->f y (asknet net x)) s -- create Samples given two MNIST files for images and labels read_samples :: FilePath -> FilePath -> IO Samples read_samples f1 f2 = do h1 <- openFile f1 ReadMode h2 <- openFile f2 ReadMode xs <- BS.hGetContents h1 >>= return . map (fromList.map ((/255).fromIntegral)) . chunksOf (28*28) . BS.unpack.BS.drop 16 ys <- BS.hGetContents h2 >>= return. map (fromList.val.fromIntegral) . BS.unpack . BS.drop 8 return $ zip xs ys where zrs= take 9 $ repeat 0 val x= take x zrs ++ [1] ++ drop x zrs -- finally some learning and testing with MNIST -- MNIST files from http://yann.lecun.com/exdb/mnist/ main = do s <- read_samples "train-images-idx3-ubyte" "train-labels-idx1-ubyte" print $ s `deepseq` length s main2 :: IO () main2 = do let ep = 10 -- number of epochs let mbs = 10 -- mini-batch size let lr = 3 -- learning rate let lay = [28*28,30,10] -- number of neurons by layer let cap = 999999 -- cap number of training samples nt <- newnet lay smpl_train <- take cap <$> read_samples "train-images-idx3-ubyte" "train-labels-idx1-ubyte" smpl_test <- read_samples "t10k-images-idx3-ubyte" "t10k-labels-idx1-ubyte" tr <- epochs ep mbs smpl_train >>= training True (tst smpl_test) (lr/fromIntegral mbs) nt putStrLn "end" where chk y1 y2 = val y1 == val y2 val x=snd . last . sort $ zip (toList x) [0..9] done = putStrLn "...[\ESC[32m\STXDone\ESC[m\STX]" str v = putStr $ take 20 (v++repeat '.') tst smpl n = "... \ESC[32m\STX" ++ show (fromIntegral (testing chk n smpl) / 100) ++ "\ESC[m\STX%"