diff options
| author | Miguel <m.i@gmx.at> | 2019-03-09 18:12:22 +0100 |
|---|---|---|
| committer | Miguel <m.i@gmx.at> | 2019-03-09 18:12:22 +0100 |
| commit | 116a5a97f600df811f0f14dd2134a72aece88fbf (patch) | |
| tree | c9409213d3a49891470994d24f96066ef12cfd05 /base64 | |
| parent | 7bdaddbb527f6a0e8c8566a98ce04b1114a85db3 (diff) | |
optimized base64.hs quite a lot
Diffstat (limited to 'base64')
| -rw-r--r-- | base64/Makefile | 6 | ||||
| -rw-r--r-- | base64/base64.hs | 190 | ||||
| -rw-r--r-- | base64/base64_1.hs | 18 |
3 files changed, 69 insertions, 145 deletions
diff --git a/base64/Makefile b/base64/Makefile index 629c734..0192413 100644 --- a/base64/Makefile +++ b/base64/Makefile @@ -1,10 +1,12 @@ +base64: base64.hs + #stack ghc -- -O2 base64.hs -rtsopts -prof -fprof-cafs -fprof-auto + stack ghc -- -O2 base64.hs + run: base64 ./base64 10 100 +RTS -N2 #cat /tmp/random.bin | ./base64 +RTS -N6 > /dev/null run-prof: base64prof cat /tmp/random.bin | ./base64prof +RTS -N6 -p -s > /dev/null -base64: base64.hs - stack ghc -- -O2 base64.hs -threaded -o base64 base64prof: base64.hs stack ghc -- -threaded -rtsopts -prof -fprof-auto -fprof-cafs -O2 base64.hs -o base64prof genrandom: diff --git a/base64/base64.hs b/base64/base64.hs index cd84c48..91348fb 100644 --- a/base64/base64.hs +++ b/base64/base64.hs @@ -1,127 +1,67 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE ViewPatterns #-} -import Data.Maybe (fromJust,isJust) -import Data.List.Split (chunksOf) -import Data.Tuple (swap) -import Data.Tuple.Extra (first,second,dupe) -import System.Environment (getArgs) +import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString as B -import qualified Data.Map.Strict as M -import Data.Char -import Data.Maybe -import Data.Word -import Data.Array -import Data.Bits ---import Data.ByteString.Base64.Lazy as B64 -import Data.ByteString.Base64 as B64 -import GHC.Int -import Control.Parallel -import Control.Parallel.Strategies - +import qualified Data.ByteString as BS +import Data.Word (Word8, Word32) +import Data.Array.Unboxed(UArray,array) +import Data.Array.Base(unsafeAt) +import Data.Bits(shiftL,shiftR,(.&.)) +import Foreign.Ptr (plusPtr) +import Foreign.Storable (peek, poke) +import Foreign.ForeignPtr (withForeignPtr) +import System.IO.Unsafe (unsafePerformIO) + +-- |Perform base64 encoding of data from standard input main :: IO() -main = BL.getContents >>= mapM_ BL.putStr . parMap rdeepseq (chunkBL 3 (fromNum 6 4.toNum 8)) . chunkB 100000 - -table64 :: Array Int Word8 -table64 = array (0,63) $ zip [0..] $ map (fromIntegral.ord) $ - ['A'..'Z']++['a'..'z']++['0'..'9']++['+','/'] - -toNum :: Int->BL.ByteString->Int -toNum s = BL.foldl' f 0 where f a v = a `shiftL` s + fromIntegral v - -fromNum :: Int->Int->Int->BL.ByteString -fromNum s l x = BL.pack $ map f (reverse [0..l-1]) - where f v = table64 ! (x `shiftR` (v*s) .&. (2^s-1)) - -chunkBL :: Int64->(BL.ByteString->BL.ByteString)->BL.ByteString->BL.ByteString -chunkBL n f b | BL.null b = b - | otherwise = let l = f . BL.take n $ b - r = chunkBL n f . BL.drop n $ b - in BL.append l r - -chunkB :: Int64->BL.ByteString->[BL.ByteString] -chunkB n b | BL.null b = [] - | otherwise = let l = BL.take n $ b - r = chunkB n . BL.drop n $ b - in l:r -{- - let arr=reverse $ BL.foldr f (0,0:[]) b - in arr --BL.unfoldr uf (0,snd arr) - where uf (_,[]) = Nothing - uf (3,x:xs) = Just (table64 ! x,(0,xs)) - uf (c,x:xs) = Just (table64 ! (x.&.63),(c+1,x `shiftR` 6:xs)) - f :: (Int,[Int]) -> Word8 -> (Int,[Int]) - f (_,[]) v = (0,[]) - f (2,x:xs) v = (0,0:(x `shiftL` 8) + fromIntegral v:xs) - f (c,x:xs) v = (c+1,(x `shiftL` 8) + fromIntegral v:xs) - - -- BL.unfoldr uf . (,)3 . snd . BL.foldl' f (0,0:[]) - -- -} -{- --} - - ---main = B.getContents >>= B.putStr . B64.encode --- Example Usage: echo "hello world" | ./base64 | ./base64 -d - --- TODO --- cleanup/simplify --- unit testing (cross check with cli base64, different lengths) --- flag for line widht / check flags / filename on command-line --- (see man base64) --- compare performance . named map? array? - --- arg<-getArgs -{- -enc64 x = B.reverse $ B.unfoldr unf $ (4, (B.foldl' (\a b->a `shiftL` 8 + fromIntegral b) (0::Int) x)) -unf (c,v) = if c==0 then Nothing else let r = v .&. 63 in Just (table64 ! r,(c-1,v`shiftR` 6)) - if length arg == 0 - then putStr . (++"\n") . encode64 . map fromIntegral . B.unpack $ dat - else B.putStr. B.pack . map fromIntegral . decode64 . C.unpack $ dat --} - ---t1 = BL.getContents >>= BL.putStr . BL.fromChunks . map (B.map f) . BL.toChunks - - - ---enc=B.map (\x->table64 ! (x `shiftR` 4)) - - - --- -- reChunkIn :: Int -> [B.ByteString] -> [B.ByteString] --- -- reChunkIn !n = go --- -- where --- -- go [] = [] --- -- go (y : ys) = case B.length y `divMod` n of --- -- (_, 0) -> y : go ys --- -- (d, _) -> case B.splitAt (d * n) y of --- -- (prefix, suffix) -> prefix : fixup suffix ys --- -- fixup acc [] = [acc] --- -- fixup acc (z : zs) = case B.splitAt (n - B.length acc) z of --- -- (prefix, suffix) -> --- -- let acc' = acc `B.append` prefix --- -- in if B.length acc' == n --- -- then let zs' = if B.null suffix --- -- then zs --- -- else suffix : zs --- -- in acc' : go zs' --- -- else -- suffix must be null --- -- fixup acc' zs --- ---dec64 k = M.lookup k mp where mp = M.fromList $ map swap table64 - -{- -encode64 :: [Int] -> [Char] -encode64 = map (fromJust.enc64.flip mod 64) -- concat . map (pad . first enc . second length . dupe) . chunksOf 3 - where enc = map (fromJust . enc64) . sumC - sumC = map fst . reverse . take 4 . drop 1 .iterate to64 . (,) 0 . sum . map (uncurry (*)) . zip mult - mult = map (256^) [2,1,0] - to64 (r,v) = let r' = v `mod` 64 in (r',(v-r')`div`64) - pad (v,l) =take 4 $ take (1+l) v ++ "===" - -decode64 :: [Char]->[Int] -decode64 = map fst . concat . map (rem . first (reverse . take 3 . drop 1 . iterate to256 . (,) 0 . dec. map (fromJust) . filter (isJust) . map dec64).second (length.filter(=='=')). dupe) . chunksOf 4 . filter (/='\n') - where dec = sum . map (uncurry (*)) . zip (map (64^) [3,2..]) - to256 (r,v) = let r' = v `mod` 256 in (r',(v-r')`div`256) - rem (v,l) = take (3-l) v --} +main = BL.getContents>>=BL.putStr.BL.fromChunks.map encode64.reChunk.BL.toChunks + +-- |Base64 index table +tab64 :: UArray Word32 Word8 +tab64 = array (0,63) $ zip [0..] $ map (BI.c2w) $ + ['A'..'Z']++['a'..'z']++['0'..'9']++['+','/'] + +-- |Encodes 3 octets into 4 sextets +enc64 :: (Word8,Word8,Word8)->(Word8,Word8,Word8,Word8) +enc64 (b1,b2,b3) = (t 3,t 2,t 1,t 0) + where t x = tab64 `unsafeAt` (n `shiftR` (x*6) .&. 63) + f b n = fromIntegral b `shiftL` n + n = f b1 16 + f b2 8 + f b3 0 + +-- |Transforms list of ByteStrings to a new list of ByteStrings with +-- lengths guaranteed to be multiples of 3 (excepting the last one) +-- Assumes that all input ByteStrings (excepting the last one) have +-- at least a length of 3. +reChunk :: [BS.ByteString] -> [BS.ByteString] +reChunk (y:[]) = [y] +reChunk (y:z:zs) = let c = BS.length y `mod` 3 + in BS.append y (BS.take 3 z):(reChunk $ (BS.drop 3 z):zs) + +-- |Wraps Base64 enode in encode64io in unsafePerformIO to use in +-- pure code, Use this only if you trust my 'encode64io' code is free +-- of side effects and indepedent of the environment. Good Luck! +encode64 :: BS.ByteString -> BS.ByteString +encode64 = unsafePerformIO . encode64io + +-- |Base64 encode a strict ByteString using foreign pointers within the +-- IO monad. +encode64io :: BS.ByteString -> IO BS.ByteString +encode64io (BI.PS ptr offs len) = do + bs <- BI.mallocByteString ln + withForeignPtr ptr $ \fp -> do + withForeignPtr bs $ \fbs -> do + let end = fp `plusPtr` (len+offs) + in go (fp`plusPtr`offs) fbs end bs + where ln=((len + 2) `div` 3) * 4 + go fp fbs end bs + | fp >= end = return $ BI.fromForeignPtr bs 0 ln + | fp `plusPtr`1 == end = cnv 1 + | fp `plusPtr`2 == end = cnv 2 + | otherwise = cnv 3 + where pok nn n v = if n>1 && nn<n + then poke (fbs`plusPtr`n) (61::Word8) + else poke (fbs`plusPtr`n) v + pek nn n = if nn<n then return 0 else peek (fp`plusPtr`n) + cnv :: Int -> IO BS.ByteString + cnv n = pek n 0>>= \b1->pek n 1>>= \b2->pek n 2>>= \b3-> + let (e1,e2,e3,e4) = enc64(b1,b2,b3) in + pok n 0 e1>>pok n 1 e2>>pok n 2 e3>>pok n 3 e4 >> + go (fp `plusPtr` 3) (fbs`plusPtr`4) end bs diff --git a/base64/base64_1.hs b/base64/base64_1.hs index 28655c1..6ab7344 100644 --- a/base64/base64_1.hs +++ b/base64/base64_1.hs @@ -1,21 +1,3 @@ --- --- Miguel's Naive Base64 Encoder --- --- Coded on a winter afterfnoon on 19th Feb 2018 A.D. --- to fully understand base64 encoding and play with --- haskell, which is always an indisputable pleasure. --- --- The following lines were written in full awareness --- that 'libraries' for this purpose, which perform way --- better, are in existence. --- --- Coded in big anger due to nick's stories about saving his binary --- format, encrypted passwords in an ascii config file, featuring --- strange letters and characters. --- --- Example Usage: echo "just testing" | stack runghc base64.hs | stack runghc base64 -- -d --- (You can cross check with base64 / base64 -d) - import System.Environment import qualified Data.List.Split as T import qualified Data.List as L |
