module General.FMIndex(
    FMIndex,
    create, fromHandle,
    extract,
    Find(..), count, locate
    ) where

import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import Data.Binary
import Control.Applicative
import Control.Arrow
import System.IO
import Prelude


data FMIndex a = FMIndex Char [(BS.ByteString, a)] deriving Show

{-
data FMIndex a = FMIndex
    {specialChar :: Char -- Character used to separate words, and which there are associations for
    ,positions :: V.Vector Word32 -- if positions[c] = n, that means there are n substrings that are less than c
    ,associated :: V.Vector a -- values associated with each specialChar
    ,rankAll :: V.Vector Word32 -- ranks, stored every 1024 entries, where rankAll[(n*256)/1024 + c] = rank of c at character n
    ,contents :: BS.ByteString
    }
-}

instance Functor FMIndex where
    fmap f (FMIndex a b) = FMIndex a $ map (second f) b

instance Binary a => Binary (FMIndex a) where
    put (FMIndex a b) = put a >> put b
    get = FMIndex <$> get <*> get

-- assign these indicies to this information
create :: Char -> [(BS.ByteString, a)] -> FMIndex a
create = FMIndex

extract :: FMIndex a -> [(BS.ByteString, a)]
extract (FMIndex _ x) = x

data Find = Exact | Prefix | Suffix | Infix

count :: FMIndex a -> Find -> BS.ByteString -> Int
count idx mode x = length $ locate idx mode x


locate :: FMIndex a -> Find -> BS.ByteString -> [(a, Int)] -- The int is how many characters you are along this string
locate (FMIndex _ xs) mode x = [(i, p) | (a,i) <- xs, Just p <- [op a]]
    where
        op = case mode of
            Exact -> \a -> if x == a then Just 0 else Nothing
            Prefix -> \a -> if x `BS.isPrefixOf` a then Just 0 else Nothing
            Suffix -> \a -> if x `BS.isSuffixOf` a then Just $ BS.length a - BS.length x else Nothing
            Infix -> \a -> let (y,z) = BS.breakSubstring x a in if BS.null z then Nothing else Just $ BS.length y


fromHandle :: Binary a => Handle -> IO (FMIndex a)
fromHandle = fmap decode . LBS.hGetContents