module Snap.Internal.Iteratee.BoyerMooreHorspool
( bmhEnumeratee
, MatchInfo(..) )
where
import Control.Monad.State
import qualified Data.ByteString as S
import Data.ByteString (ByteString)
import Data.ByteString.Unsafe as S
import Data.Enumerator hiding (head, filter, last, map)
import qualified Data.Enumerator.List as EL
import Data.Int
import qualified Data.Vector.Unboxed as V
import qualified Data.Vector.Unboxed.Mutable as MV
import Prelude hiding (head, last)
data MatchInfo = Match !ByteString
| NoMatch !ByteString
deriving (Show)
lookahead :: (MonadIO m) =>
Int
-> Iteratee ByteString m (Either ByteString ByteString)
lookahead n = go id n
where
go !dlist !k = do
EL.head >>= maybe
(do
let !ls = S.concat $ dlist []
return $! Left ls)
(\x -> do
let !l = S.length x
let !r = k l
let !d' = dlist . (x:)
if r <= 0
then do
let !ls = S.concat $ d' []
return $! Right ls
else go d' r)
matches :: ByteString
-> Int
-> Int
-> ByteString
-> Int
-> Int
-> Bool
matches !needle !nstart !nend' !haystack !hstart !hend' =
go nend' hend'
where
go !nend !hend =
if nend < nstart || hend < hstart
then True
else let !nc = S.unsafeIndex needle nend
!hc = S.unsafeIndex haystack hend
in if nc /= hc
then False
else go (nend1) (hend1)
bmhEnumeratee :: (MonadIO m) =>
ByteString
-> Step MatchInfo m a
-> Iteratee ByteString m (Step MatchInfo m a)
bmhEnumeratee needle _step = do
cDone _step iter
where
cDone (Continue k) f = f k
cDone step _ = yield step (Chunks [])
iter !k = do
lookahead nlen >>= either (finishAndEOF k . (:[]))
(startSearch k)
finishAndEOF k xs = do
step <- lift $ runIteratee $ k $
Chunks (map NoMatch $ filter (not . S.null) xs)
cDone step (\k' -> lift $ runIteratee $ k' EOF)
startSearch !k !haystack = do
if S.null haystack
then lookahead nlen >>=
either (\s -> finishAndEOF k [s])
(startSearch k)
else go 0
where
!hlen = S.length haystack
go !hidx
| hend >= hlen = crossBound hidx
| otherwise = do
let match = matches needle 0 last haystack hidx hend
if match
then do
let !nomatch = S.take hidx haystack
let !aftermatch = S.drop (hend+1) haystack
step <- if not $ S.null nomatch
then lift $ runIteratee $ k
$ Chunks [NoMatch nomatch]
else return $! Continue k
cDone step $ \k' -> do
step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
cDone step' $ \k'' -> startSearch k'' aftermatch
else do
let c = S.unsafeIndex haystack hend
let !skip = V.unsafeIndex table $ fromEnum c
go (hidx + skip)
where
!hend = hidx + nlen 1
mkCoeff hidx = let !ll = hlen hidx
!nm = nlen ll
in (ll,nm)
crossBound !hidx0 = do
let (!leftLen, needMore) = mkCoeff hidx0
lookahead needMore >>=
either (\s -> finishAndEOF k [haystack, s])
(runNext hidx0 leftLen needMore)
where
runNext !hidx !leftLen !needMore !nextHaystack = do
let match1 = matches needle leftLen last
nextHaystack 0 (needMore1)
let match2 = matches needle 0 (leftLen1)
haystack hidx (hlen1)
if match1 && match2
then do
let !nomatch = S.take hidx haystack
let !aftermatch = S.drop needMore nextHaystack
step <- if not $ S.null nomatch
then lift $ runIteratee $ k $
Chunks [NoMatch nomatch]
else return $! Continue k
cDone step $ \k' -> do
step' <- lift $ runIteratee $ k' $
Chunks [Match needle]
cDone step' $ \k'' ->
startSearch k'' aftermatch
else do
let c = S.unsafeIndex nextHaystack $ needMore1
let p = V.unsafeIndex table (fromEnum c)
if p < leftLen
then do
let !hidx' = hidx+p
let (!leftLen', needMore') = mkCoeff hidx'
let !nextlen = S.length nextHaystack
if (nextlen < needMore')
then do
lookahead (needMore' nextlen) >>=
either (\s -> finishAndEOF k [ haystack
, nextHaystack
, s ])
(\s -> runNext hidx' leftLen' needMore' $
S.append nextHaystack s)
else runNext hidx' leftLen' needMore' nextHaystack
else do
let sidx = p leftLen
let (!crumb, !rest) = S.splitAt sidx nextHaystack
step <- lift $ runIteratee $ k $
Chunks $ map NoMatch $
filter (not . S.null) [haystack, crumb]
cDone step $ flip startSearch rest
!nlen = S.length needle
!last = nlen 1
!table = V.create $ do
t <- MV.replicate 256 nlen
go t
where
go !t = go' 0
where
go' !i | i >= last = return t
| otherwise = do
let c = fromEnum $ S.unsafeIndex needle i
MV.unsafeWrite t c (last i)
go' $! i+1