module Config.LexerUtils where
import Data.Char (GeneralCategory(..), generalCategory, digitToInt,
isAscii, isSpace, readLitChar, ord)
import Data.Monoid ((<>))
import Data.Text (Text)
import Data.Text.Lazy.Builder (Builder)
import Data.Word (Word8)
import Numeric (readInt)
import qualified Data.Text as Text
import qualified Data.Text.Lazy as LText
import qualified Data.Text.Lazy.Builder as Builder
#if !MIN_VERSION_base(4,8,0)
import Data.Monoid (mempty)
#endif
import Config.Tokens
type AlexInput = Located Text
alexStartPos :: Position
alexStartPos = Position { posIndex = 0, posLine = 1, posColumn = 1 }
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte (Located p cs)
= do (c,!cs') <- Text.uncons cs
let !p' = alexMove p c
!b = byteForChar c
return (b, Located p' cs')
alexMove :: Position -> Char -> Position
alexMove (Position ix line column) c =
case c of
'\t' -> Position (ix + 1) line (((column + 7) `div` 8) * 8 + 1)
'\n' -> Position (ix + 1) (line + 1) 1
_ -> Position (ix + 1) line (column + 1)
data LexerMode
= InNormal
| InComment !Position !LexerMode
| InCommentString !Position !LexerMode
| InString !Position !Builder
type Action = Located Text -> LexerMode -> (LexerMode, Maybe (Located Token))
token :: (Text -> Token) -> Action
token f match st = (st, Just (fmap f match))
modeChange :: (Located Text -> LexerMode -> LexerMode) -> Action
modeChange f match st = (f match st, Nothing)
startComment :: Action
startComment = modeChange (InComment . locPosition)
endComment :: Action
endComment = modeChange $ \_ (InComment _ st) -> st
startCommentString :: Action
startCommentString = modeChange (InCommentString . locPosition)
endCommentString :: Action
endCommentString = modeChange $ \_ (InCommentString _ st) -> st
startString :: Action
startString = modeChange $ \match _ -> InString (locPosition match) mempty
endString :: Action
endString _ = \(InString posn builder) ->
let !t = getStringLit builder
in (InNormal, Just (Located posn (String t)))
getStringLit :: Builder -> Text
getStringLit = LText.toStrict . Builder.toLazyText
addString :: Action
addString = modeChange $ \match (InString posn builder) ->
InString posn (builder <> Builder.fromText (locThing match))
addCharLit :: Action
addCharLit = modeChange $ \match (InString posn builder) ->
case readLitChar (Text.unpack (locThing match)) of
[(c,"")] -> InString posn (builder <> Builder.singleton c)
_ -> error "addCharLit: Lexer failure"
badEscape :: Action
badEscape = token $ \str -> ErrorEscape str
untermString :: Action
untermString _ = \(InString posn builder) ->
(InNormal, Just (Located posn (ErrorUntermString (getStringLit builder))))
number ::
Int ->
Int ->
Text ->
Token
number prefixLen base str =
case readInt (fromIntegral base) (const True) digitToInt str2 of
[(n,"")] -> Number base (s*n)
_ -> error "number: Lexer failure"
where
str2 = drop prefixLen str1
(s,str1) = case Text.unpack str of
'-':rest -> (1, rest)
rest -> ( 1, rest)
section :: Text -> Token
section = Section . Text.dropWhileEnd isSpace . Text.init
byteForChar :: Char -> Word8
byteForChar c
| c <= '\6' = non_graphic
| isAscii c = fromIntegral (ord c)
| otherwise = case generalCategory c of
LowercaseLetter -> lower
OtherLetter -> lower
UppercaseLetter -> upper
TitlecaseLetter -> upper
DecimalNumber -> digit
OtherNumber -> digit
ConnectorPunctuation -> symbol
DashPunctuation -> symbol
OtherPunctuation -> symbol
MathSymbol -> symbol
CurrencySymbol -> symbol
ModifierSymbol -> symbol
OtherSymbol -> symbol
Space -> space
ModifierLetter -> other
NonSpacingMark -> other
SpacingCombiningMark -> other
EnclosingMark -> other
LetterNumber -> other
OpenPunctuation -> other
ClosePunctuation -> other
InitialQuote -> other
FinalQuote -> other
_ -> non_graphic
where
non_graphic = 0
upper = 1
lower = 2
digit = 3
symbol = 4
space = 5
other = 6