{-# OPTIONS  #-}
-----------------------------------------------------------------------------
-- |
-- Module      : Language.Python.Common.LexerUtils 
-- Copyright   : (c) 2009 Bernie Pope 
-- License     : BSD-style
-- Maintainer  : bjpop@csse.unimelb.edu.au
-- Stability   : experimental
-- Portability : ghc
--
-- Various utilities to support the Python lexer. 
-----------------------------------------------------------------------------

module Language.Python.Common.LexerUtils where

import Control.Monad (liftM)
import Control.Monad.Error.Class (throwError)
import Data.List (foldl')
import Data.Map as Map hiding (null, map, foldl')
import Data.Word (Word8)
import Data.Char (ord)
import Numeric (readHex, readOct)
import Language.Python.Common.Token as Token 
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation 
import Codec.Binary.UTF8.String as UTF8 (encode)

type Byte = Word8

-- Beginning of. BOF = beginning of file, BOL = beginning of line
data BO = BOF | BOL

-- Functions for building tokens 

type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token 

lineJoin :: Action
lineJoin span _len _str = 
   return $ LineJoinToken $ spanStartPoint span

endOfLine :: P Token -> Action
endOfLine lexToken span _len _str = do
   setLastEOL $ spanStartPoint span
   lexToken

bolEndOfLine :: P Token -> Int -> Action 
bolEndOfLine lexToken bol span len inp = do
   pushStartCode bol 
   endOfLine lexToken span len inp

dedentation :: P Token -> Action
dedentation lexToken span _len _str = do
   topIndent <- getIndent
   -- case compare (endCol span) topIndent of
   case compare (startCol span) topIndent of
      EQ -> do popStartCode
               lexToken 
      LT -> do popIndent
               return dedentToken 
      GT -> spanError span "indentation error"

indentation :: P Token -> Int -> BO -> Action 
-- Check if we are at the EOF. If yes, we may need to generate a newline,
-- in case we came here from BOL (but not BOF).
indentation lexToken _dedentCode bo _loc _len [] = do
   popStartCode
   case bo of
      BOF -> lexToken
      BOL -> newlineToken
indentation lexToken dedentCode bo span _len _str = do
   popStartCode
   parenDepth <- getParenStackDepth
   if parenDepth > 0
      then lexToken
      else do 
         topIndent <- getIndent
         -- case compare (endCol span) topIndent of
         case compare (startCol span) topIndent of
            EQ -> case bo of
                     BOF -> lexToken
                     BOL -> newlineToken   
            LT -> do pushStartCode dedentCode 
                     newlineToken 
            -- GT -> do pushIndent (endCol span)
            GT -> do pushIndent (startCol span)
                     return indentToken
   where
   indentToken = IndentToken span

symbolToken :: (SrcSpan -> Token) -> Action 
symbolToken mkToken location _ _ = return (mkToken location)

token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action 
token mkToken read location len str 
   = return $ mkToken location literal (read literal) 
   where
   literal = take len str

-- special tokens for the end of file and end of line
endOfFileToken :: Token
endOfFileToken = EOFToken SpanEmpty
dedentToken = DedentToken SpanEmpty 

newlineToken :: P Token
newlineToken = do
   loc <- getLastEOL
   return $ NewlineToken loc

-- Test if we are at the end of the line or file
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF _user _inputBeforeToken _tokenLength (_loc, _bs, inputAfterToken) 
   = null inputAfterToken || nextChar == '\n' || nextChar == '\r'
   where
   nextChar = head inputAfterToken 

notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF _user _inputBeforeToken _tokenLength (_loc, _bs, inputAfterToken) 
   = not (null inputAfterToken)

readBinary :: String -> Integer
readBinary 
   = toBinary . drop 2 
   where
   toBinary = foldl' acc 0
   acc b '0' = 2 * b
   acc b '1' = 2 * b + 1

readFloat :: String -> Double
readFloat str@('.':cs) = read ('0':readFloatRest str)
readFloat str = read (readFloatRest str)
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest ['.'] = ".0"
readFloatRest (c:cs) = c : readFloatRest cs

mkString :: (SrcSpan -> String -> Token) -> Action
mkString toToken loc len str = do
   return $ toToken loc (take len str)

stringToken :: SrcSpan -> String -> Token
stringToken = StringToken

rawStringToken :: SrcSpan -> String -> Token
rawStringToken = StringToken

byteStringToken :: SrcSpan -> String -> Token
byteStringToken = ByteStringToken

unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = UnicodeStringToken

rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = ByteStringToken

openParen :: (SrcSpan -> Token) -> Action
openParen mkToken loc _len _str = do
   let token = mkToken loc
   pushParen token 
   return token 

closeParen :: (SrcSpan -> Token) -> Action
closeParen mkToken loc _len _str = do
  let token = mkToken loc
  topParen <- getParen
  case topParen of
     Nothing -> spanError loc err1 
     Just open -> if matchParen open token 
                    then popParen >> return token
                    else spanError loc err2
   where
   -- XXX fix these error messages
   err1 = "Lexical error ! unmatched closing paren"
   err2 = "Lexical error ! unmatched closing paren"

matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = True
matchParen _ _ = False

-- -----------------------------------------------------------------------------
-- Functionality required by Alex 

type AlexInput = (SrcLocation,  -- current src location
                 [Byte],        -- byte buffer for next character
                 String)        -- input string

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = error "alexInputPrevChar not used"

-- byte buffer should be empty here
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (loc, [], input) 
   | null input  = Nothing
   | otherwise = seq nextLoc (Just (nextChar, (nextLoc, [], rest)))
   where
   nextChar = head input
   rest = tail input 
   nextLoc = moveChar nextChar loc
alexGetChar (loc, _:_, _) = error "alexGetChar called with non-empty byte buffer"

-- mapFst :: (a -> b) -> (a, c) -> (b, c)
-- mapFst f (a, c) = (f a, c)

alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
-- alexGetByte = fmap (mapFst (fromIntegral . ord)) . alexGetChar
alexGetByte (loc, b:bs, input) = Just (b, (loc, bs, input))
alexGetByte (loc, [], []) = Nothing
alexGetByte (loc, [], nextChar:rest) =
   seq nextLoc (Just (byte, (nextLoc, restBytes, rest)))
   where
   nextLoc = moveChar nextChar loc
   byte:restBytes = UTF8.encode [nextChar]

moveChar :: Char -> SrcLocation -> SrcLocation 
moveChar '\n' = incLine 1 
moveChar '\t' = incTab 
moveChar '\r' = id 
moveChar _    = incColumn 1 

lexicalError :: P a
lexicalError = do
  location <- getLocation
  c <- liftM head getInput
  throwError $ UnexpectedChar c location

readOctNoO :: String -> Integer
readOctNoO (zero:rest) = read (zero:'O':rest)