{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Rank2Types        #-}

module Snap.Internal.Iteratee.BoyerMooreHorspool
  ( bmhEnumeratee
  , MatchInfo(..) )
  where

import           Control.Monad.State
import qualified Data.ByteString as S
import           Data.ByteString (ByteString)
import           Data.ByteString.Unsafe as S
import           Data.Enumerator hiding (head, filter, last, map)
import qualified Data.Enumerator.List as EL
import           Data.Int
import qualified Data.Vector.Unboxed as V
import qualified Data.Vector.Unboxed.Mutable  as MV
import           Prelude               hiding (head, last)


--{-# INLINE debug #-}
--debug :: MonadIO m => String -> m ()
--debug s = liftIO $ putStrLn s
--debug _ = return ()

------------------------------------------------------------------------------
data MatchInfo = Match !ByteString
               | NoMatch !ByteString
  deriving (Show)


-- We return strict bytestring because we always expect a chunk to be bigger
-- than the needle
lookahead :: (MonadIO m) =>
             Int
          -> Iteratee ByteString m (Either ByteString ByteString)
lookahead n = go id n
  where
    go !dlist !k = do
        EL.head >>= maybe
                        (do
                            let !ls = S.concat $ dlist []
                            -- debug $ "lookahead " ++ show n
                            --  ++ " failing, returning " ++ show ls

                            return $! Left ls)
                        (\x -> do
                             let !l  = S.length x
                             let !r  = k - l
                             let !d' = dlist . (x:)

                             if r <= 0
                               then do
                                   let !ls = S.concat $ d' []
                                   -- debug $ "lookahead " ++ show n
                                   --  ++ " successfully returning "
                                   --  ++ show ls
                                   return $! Right ls
                               else go d' r)
{-# INLINE lookahead #-}

matches :: ByteString     -- ^ needle
        -> Int            -- ^ needle start
        -> Int            -- ^ needle end (inclusive)
        -> ByteString     -- ^ haystack
        -> Int            -- ^ haystack start
        -> Int            -- ^ haystack end (inclusive)
        -> Bool
matches !needle !nstart !nend' !haystack !hstart !hend' =
    go nend' hend'
  where
    go !nend !hend =
        if nend < nstart || hend < hstart
          then True
          else let !nc = S.unsafeIndex needle nend
                   !hc = S.unsafeIndex haystack hend
               in if nc /= hc
                    then False
                    else go (nend-1) (hend-1)
{-# INLINE matches #-}


bmhEnumeratee :: (MonadIO m) =>
                 ByteString
              -> Step MatchInfo m a
              -> Iteratee ByteString m (Step MatchInfo m a)
bmhEnumeratee needle _step = do
    -- debug $ "boyermoore: needle=" ++ show needle
    cDone _step iter
  where
    {-# INLINE cDone #-}
    cDone (Continue k) f = f k
    cDone step _ = yield step (Chunks [])


    iter !k = {-# SCC "bmh/iter" #-} do
        lookahead nlen >>= either (finishAndEOF k . (:[]))
                                  (startSearch k)

    finishAndEOF k xs = {-# SCC "finishAndEOF" #-} do
        -- debug $ "finishAndEOF, returning NoMatch for " ++ show xs
        step <- lift $ runIteratee $ k $
                Chunks (map NoMatch $ filter (not . S.null) xs)
        cDone step (\k' -> lift $ runIteratee $ k' EOF)


    startSearch !k !haystack = {-# SCC "startSearch" #-} do
        -- debug $ "startsearch: " ++ show haystack
        if S.null haystack
           then lookahead nlen >>=
                either (\s -> finishAndEOF k [s])
                       (startSearch k)
           else go 0
      where
        !hlen = S.length haystack

        go !hidx
          | hend >= hlen = crossBound hidx
          | otherwise = {-# SCC "go" #-} do
              let match = matches needle 0 last haystack hidx hend
              -- debug $ "go " ++ show hidx ++ ", hend=" ++ show hend
              --           ++ ", match was " ++ show match
              if match
                then {-# SCC "go/match" #-} do
                  let !nomatch = S.take hidx haystack
                  let !aftermatch = S.drop (hend+1) haystack

                  step <- if not $ S.null nomatch
                            then lift $ runIteratee $ k
                                      $ Chunks [NoMatch nomatch]
                            else return $! Continue k

                  cDone step $ \k' -> do
                      step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
                      cDone step' $ \k'' -> startSearch k'' aftermatch
                else {-# SCC "go/nomatch" #-} do
                  -- skip ahead
                  let c = S.unsafeIndex haystack hend
                  let !skip = V.unsafeIndex table $ fromEnum c
                  go (hidx + skip)
          where
            !hend = hidx + nlen - 1

        mkCoeff hidx = let !ll = hlen - hidx
                           !nm = nlen - ll
                       in (ll,nm)

        crossBound !hidx0 = {-# SCC "crossBound" #-} do
            let (!leftLen, needMore) = mkCoeff hidx0

            lookahead needMore >>=
             either (\s -> finishAndEOF k [haystack, s])
                    (runNext hidx0 leftLen needMore)
          where
            runNext !hidx !leftLen !needMore !nextHaystack = do
               let match1 = matches needle leftLen last
                                    nextHaystack 0 (needMore-1)
               let match2 = matches needle 0 (leftLen-1)
                                    haystack hidx (hlen-1)

               -- debug $ "crossbound match1=" ++ show match1
               --           ++ " match2=" ++ show match2

               if match1 && match2
                 then {-# SCC "crossBound/match" #-} do
                   let !nomatch = S.take hidx haystack
                   let !aftermatch = S.drop needMore nextHaystack

                   -- FIXME: merge this code w/ above
                   step <- if not $ S.null nomatch
                             then lift $ runIteratee $ k $
                                  Chunks [NoMatch nomatch]
                             else return $! Continue k

                   -- debug $ "matching"
                   cDone step $ \k' -> do
                       step' <- lift $ runIteratee $ k' $
                                Chunks [Match needle]
                       cDone step' $ \k'' ->
                           startSearch k'' aftermatch

                 else {-# SCC "crossBound/nomatch" #-} do
                   let c = S.unsafeIndex nextHaystack $ needMore-1
                   let p = V.unsafeIndex table (fromEnum c)

                   -- debug $ "p was " ++ show p ++ ", ll=" ++ show leftLen
                   if p < leftLen
                     then do
                       let !hidx' = hidx+p
                       let (!leftLen', needMore') = mkCoeff hidx'
                       let !nextlen = S.length nextHaystack
                       if (nextlen < needMore')
                         then do
                           -- this should be impossibly rare
                           lookahead (needMore' - nextlen) >>=
                             either (\s -> finishAndEOF k [ haystack
                                                          , nextHaystack
                                                          , s ])
                                    (\s -> runNext hidx' leftLen' needMore' $
                                           S.append nextHaystack s)
                         else runNext hidx' leftLen' needMore' nextHaystack
                     else do
                       let sidx = p - leftLen
                       let (!crumb, !rest) = S.splitAt sidx nextHaystack
                       step <- lift $ runIteratee $ k $
                               Chunks $ map NoMatch $
                               filter (not . S.null) [haystack, crumb]

                       cDone step $ flip startSearch rest


    !nlen = S.length needle

    !last = nlen - 1

    !table = V.create $ do
        t <- MV.replicate 256 nlen
        go t

      where
        go !t = go' 0
          where
            go' !i | i >= last  = return t
                   | otherwise = do
                let c = fromEnum $ S.unsafeIndex needle i
                MV.unsafeWrite t c (last - i)
                go' $! i+1

{-
testIt :: ByteString -> [ByteString] -> IO [MatchInfo]
testIt needle haystack = do
    consumeStep <- runIteratee EL.consume
    eteeStep    <- runIteratee $ etee consumeStep
    -- iter :: Iteratee ByteString m (Step MatchInfo m [MatchInfo])
    let iter = enumList 1 haystack eteeStep
    finalInnerStep <- run_ iter
    run_ $ returnI finalInnerStep

  where
    etee = bmhEnumeratee needle
-}