ref: 660cf9275b8374f89c8f5db51a070070f28c1afc
dir: /lib/Text/Read/Lex.hs/
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
-----------------------------------------------------------------------------
-- |
-- Module : GHC.Internal.Text.Read.Lex
-- Copyright : (c) The University of Glasgow 2002
-- License : BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer : libraries@haskell.org
-- Stability : provisional
-- Portability : non-portable (uses Text.ParserCombinators.ReadP)
--
-- The cut-down Haskell lexer, used by GHC.Internal.Text.Read
--
-----------------------------------------------------------------------------
module Text.Read.Lex
-- lexing types
( Lexeme(..), Number
, numberToInteger, numberToFixed, numberToRational, numberToRangedRational
-- lexer
, lex, expect
, hsLex
, lexChar
, readBinP
, readIntP
, readOctP
, readDecP
, readHexP
, isSymbolChar
)
where
import Prelude()
import Control.Error
import Control.Monad
import Data.Char
import Data.Bool
import Data.Bounded
import Data.Eq
import Data.Function
import Data.Int
import Data.Integer
import Data.Integral
import Data.List
import Data.Maybe
import Data.Num
import Data.Ord
import Data.Ratio
import Data.Tuple
import Text.Show
import Text.ParserCombinators.ReadP
{-
import Control.Monad
import Data.Enum
import Data.List
import Data.Maybe
import Data.Real
-}
-- -----------------------------------------------------------------------------
-- Lexing types
-- ^ Haskell lexemes.
data Lexeme
= Char Char -- ^ Character literal
| String String -- ^ String literal, with escapes interpreted
| Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@
| Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@
| Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@
| Number Number -- ^ @since base-4.6.0.0
| EOF
deriving ( Eq -- ^ @since base-2.01
, Show -- ^ @since base-2.01
)
-- | @since base-4.6.0.0
data Number = MkNumber Int -- Base
Digits -- Integral part
| MkDecimal Digits -- Integral part
(Maybe Digits) -- Fractional part
(Maybe Integer) -- Exponent
deriving ( Eq -- ^ @since base-4.6.0.0
, Show -- ^ @since base-4.6.0.0
)
-- | @since base-4.5.1.0
numberToInteger :: Number -> Maybe Integer
numberToInteger (MkNumber base iPart) = Just (val (fromIntegral base) iPart)
numberToInteger (MkDecimal iPart Nothing Nothing) = Just (val 10 iPart)
numberToInteger _ = Nothing
-- | @since base-4.7.0.0
numberToFixed :: Integer -> Number -> Maybe (Integer, Integer)
numberToFixed _ (MkNumber base iPart) = Just (val (fromIntegral base) iPart, 0)
numberToFixed _ (MkDecimal iPart Nothing Nothing) = Just (val 10 iPart, 0)
numberToFixed p (MkDecimal iPart (Just fPart) Nothing)
= let i = val 10 iPart
f = val 10 (integerTake p (fPart ++ repeat 0))
-- Sigh, we really want genericTake, but that's above us in
-- the hierarchy, so we define our own version here (actually
-- specialised to Integer)
integerTake :: Integer -> [a] -> [a]
integerTake n _ | n <= 0 = []
integerTake _ [] = []
integerTake n (x:xs) = x : integerTake (n-1) xs
in Just (i, f)
numberToFixed _ _ = Nothing
-- This takes a floatRange, and if the Rational would be outside of
-- the floatRange then it may return Nothing. Not that it will not
-- /necessarily/ return Nothing, but it is good enough to fix the
-- space problems in #5688
-- Ways this is conservative:
-- * the floatRange is in base 2, but we pretend it is in base 10
-- * we pad the floatRange a bit, just in case it is very small
-- and we would otherwise hit an edge case
-- * We only worry about numbers that have an exponent. If they don't
-- have an exponent then the Rational won't be much larger than the
-- Number, so there is no problem
-- | @since base-4.5.1.0
numberToRangedRational :: (Int, Int) -> Number
-> Maybe Rational -- Nothing = Inf
numberToRangedRational (neg, pos) n@(MkDecimal iPart mFPart (Just exp))
-- Calculate amount to increase/decrease the exponent, based on (non
-- leading zero) places in the iPart, or leading zeros in the fPart.
-- If iPart and fPart are all zeros, return Nothing.
= let mFirstDigit = case dropWhile (0 ==) iPart of
iPart'@(_ : _) -> Just (length iPart')
[] -> case mFPart of
Nothing -> Nothing
Just fPart ->
case span (0 ==) fPart of
(_, []) -> Nothing
(zeroes, _) ->
Just (negate (length zeroes))
in case mFirstDigit of
Nothing -> Just 0
Just firstDigit ->
-- compare exp to bounds as Integer to avoid over/underflow
let firstDigit' = toInteger firstDigit + exp
in if firstDigit' > toInteger (pos + 3)
then Nothing
else if firstDigit' < toInteger (neg - 3)
then Just 0
else Just (numberToRational n)
numberToRangedRational _ n = Just (numberToRational n)
-- | @since base-4.6.0.0
numberToRational :: Number -> Rational
numberToRational (MkNumber base iPart) = val (fromIntegral base) iPart % 1
numberToRational (MkDecimal iPart mFPart mExp)
= let i = val 10 iPart
in case (mFPart, mExp) of
(Nothing, Nothing) -> i % 1
(Nothing, Just exp)
| exp >= 0 -> (i * (10 ^ exp)) % 1
| otherwise -> i % (10 ^ (- exp))
(Just fPart, Nothing) -> fracExp 0 i fPart
(Just fPart, Just exp) -> fracExp exp i fPart
-- fracExp is a bit more efficient in calculating the Rational.
-- Instead of calculating the fractional part alone, then
-- adding the integral part and finally multiplying with
-- 10 ^ exp if an exponent was given, do it all at once.
-- -----------------------------------------------------------------------------
-- Lexing
lex :: ReadP Lexeme
lex = skipSpaces >> lexToken
-- | @since base-4.7.0.0
expect :: Lexeme -> ReadP ()
expect lexeme = do { skipSpaces
; thing <- lexToken
; if thing == lexeme then return () else pfail }
hsLex :: ReadP String
-- ^ Haskell lexer: returns the lexed string, rather than the lexeme
hsLex = do skipSpaces
(s,_) <- gather lexToken
return s
lexToken :: ReadP Lexeme
lexToken = lexEOF +++
lexLitChar +++
lexString +++
lexPunc +++
lexSymbol +++
lexId +++
lexNumber
-- ----------------------------------------------------------------------
-- End of file
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
guard (null s)
return EOF
-- ---------------------------------------------------------------------------
-- Single character lexemes
lexPunc :: ReadP Lexeme
lexPunc =
do c <- satisfy isPuncChar
return (Punc [c])
-- | The @special@ character class as defined in the Haskell Report.
isPuncChar :: Char -> Bool
isPuncChar c = c `elem` ",;()[]{}`"
-- ----------------------------------------------------------------------
-- Symbols
lexSymbol :: ReadP Lexeme
lexSymbol =
do s <- munch1 isSymbolChar
if s `elem` reserved_ops then
return (Punc s) -- Reserved-ops count as punctuation
else
return (Symbol s)
where
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"] :: [String]
{-
isSymbolChar :: Char -> Bool
isSymbolChar c = not (isPuncChar c) && case generalCategory c of
MathSymbol -> True
CurrencySymbol -> True
ModifierSymbol -> True
OtherSymbol -> True
DashPunctuation -> True
OtherPunctuation -> not (c `elem` "'\"")
ConnectorPunctuation -> c /= '_'
_ -> False
-}
isSymbolChar :: Char -> Bool
isSymbolChar c = c `elem` ("!@#$%&?+./<=>?\\^|:-~"::String)
-- ----------------------------------------------------------------------
-- identifiers
lexId :: ReadP Lexeme
lexId = do c <- satisfy isIdsChar
s <- munch isIdfChar
return (Ident (c:s))
where
-- Identifiers can start with a '_'
isIdsChar c = isAlpha c || c == '_'
isIdfChar c = isAlphaNum c || c `elem` "_'"
-- ---------------------------------------------------------------------------
-- Lexing character literals
lexLitChar :: ReadP Lexeme
lexLitChar =
do _ <- char '\''
(c,esc) <- lexCharE
guard (esc || c /= '\'') -- Eliminate '' possibility
_ <- char '\''
return (Char c)
lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; consumeEmpties; return c }
where
-- Consumes the string "\&" repeatedly and greedily (will only produce one match)
consumeEmpties :: ReadP ()
consumeEmpties = do
rest <- look
case rest of
('\\':'&':_) -> string "\\&" >> consumeEmpties
_ -> return ()
lexCharE :: ReadP (Char, Bool) -- "escaped or not"?
lexCharE =
do c1 <- get
if c1 == '\\'
then do c2 <- lexEsc; return (c2, True)
else return (c1, False)
where
lexEsc =
lexEscChar
+++ lexNumeric
+++ lexCntrlChar
+++ lexAscii
lexEscChar =
do c <- get
case c of
'a' -> return '\a'
'b' -> return '\b'
'f' -> return '\f'
'n' -> return '\n'
'r' -> return '\r'
't' -> return '\t'
'v' -> return '\v'
'\\' -> return '\\'
'\"' -> return '\"'
'\'' -> return '\''
_ -> pfail
lexNumeric =
do base <- lexBaseChar <++ return 10
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
lexCntrlChar =
do _ <- char '^'
c <- get
case c of
'@' -> return '\^@'
'A' -> return '\^A'
'B' -> return '\^B'
'C' -> return '\^C'
'D' -> return '\^D'
'E' -> return '\^E'
'F' -> return '\^F'
'G' -> return '\^G'
'H' -> return '\^H'
'I' -> return '\^I'
'J' -> return '\^J'
'K' -> return '\^K'
'L' -> return '\^L'
'M' -> return '\^M'
'N' -> return '\^N'
'O' -> return '\^O'
'P' -> return '\^P'
'Q' -> return '\^Q'
'R' -> return '\^R'
'S' -> return '\^S'
'T' -> return '\^T'
'U' -> return '\^U'
'V' -> return '\^V'
'W' -> return '\^W'
'X' -> return '\^X'
'Y' -> return '\^Y'
'Z' -> return '\^Z'
'[' -> return '\^['
'\\' -> return '\^\'
']' -> return '\^]'
'^' -> return '\^^'
'_' -> return '\^_'
_ -> pfail
lexAscii =
choice
[ (string "SOH" >> return '\SOH') <++
(string "SO" >> return '\SO')
-- \SO and \SOH need maximal-munch treatment
-- See the Haskell report Sect 2.6
, string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "ENQ" >> return '\ENQ'
, string "ACK" >> return '\ACK'
, string "BEL" >> return '\BEL'
, string "BS" >> return '\BS'
, string "HT" >> return '\HT'
, string "LF" >> return '\LF'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
, string "DC2" >> return '\DC2'
, string "DC3" >> return '\DC3'
, string "DC4" >> return '\DC4'
, string "NAK" >> return '\NAK'
, string "SYN" >> return '\SYN'
, string "ETB" >> return '\ETB'
, string "CAN" >> return '\CAN'
, string "EM" >> return '\EM'
, string "SUB" >> return '\SUB'
, string "ESC" >> return '\ESC'
, string "FS" >> return '\FS'
, string "GS" >> return '\GS'
, string "RS" >> return '\RS'
, string "US" >> return '\US'
, string "SP" >> return '\SP'
, string "DEL" >> return '\DEL'
]
-- ---------------------------------------------------------------------------
-- string literal
lexString :: ReadP Lexeme
lexString =
do _ <- char '"'
body id
where
body f =
do (c,esc) <- lexStrItem
if c /= '"' || esc
then body (f.(c:))
else let s = f "" in
return (String s)
lexStrItem = (lexEmpty >> lexStrItem)
+++ lexCharE
lexEmpty =
do _ <- char '\\'
c <- get
case c of
'&' -> return ()
_ | isSpace c -> do skipSpaces; _ <- char '\\'; return ()
_ -> pfail
-- ---------------------------------------------------------------------------
-- Lexing numbers
type Base = Int
type Digits = [Int]
lexNumber :: ReadP Lexeme
lexNumber
= lexHexOct <++ -- First try for hex or octal 0x, 0o etc
-- If that fails, try for a decimal number
lexDecNumber -- Start with ordinary digits
lexHexOct :: ReadP Lexeme
lexHexOct
= do _ <- char '0'
base <- lexBaseChar
digits <- lexDigits base
return (Number (MkNumber base digits))
lexBaseChar :: ReadP Int
-- Lex a single character indicating the base; fail if not there
lexBaseChar = do
c <- get
case c of
'b' -> return 2
'B' -> return 2
'o' -> return 8
'O' -> return 8
'x' -> return 16
'X' -> return 16
_ -> pfail
lexDecNumber :: ReadP Lexeme
lexDecNumber =
do xs <- lexDigits 10
mFrac <- lexFrac <++ return Nothing
mExp <- lexExp <++ return Nothing
return (Number (MkDecimal xs mFrac mExp))
lexFrac :: ReadP (Maybe Digits)
-- Read the fractional part; fail if it doesn't
-- start ".d" where d is a digit
lexFrac = do _ <- char '.'
fraction <- lexDigits 10
return (Just fraction)
lexExp :: ReadP (Maybe Integer)
lexExp = do _ <- char 'e' +++ char 'E'
exp <- signedExp +++ lexInteger 10
return (Just exp)
where
signedExp
= do c <- char '-' +++ char '+'
n <- lexInteger 10
return (if c == '-' then -n else n)
lexDigits :: Int -> ReadP Digits
-- Lex a non-empty sequence of digits in specified base
lexDigits base =
do s <- look
xs <- scan s id
guard (not (null xs))
return xs
where
scan (c:cs) f = case valDig base c of
Just n -> do _ <- get; scan cs (f.(n:))
Nothing -> return (f [])
scan [] f = return (f [])
lexInteger :: Base -> ReadP Integer
lexInteger base =
do xs <- lexDigits base
return (val (fromIntegral base) xs)
val :: Num a => a -> Digits -> a
val = valSimple
{-# RULES
"val/Integer" val = valInteger
#-}
{-# INLINE [1] val #-}
-- The following algorithm is only linear for types whose Num operations
-- are in constant time.
valSimple :: (Num a, Integral d) => a -> [d] -> a
valSimple base = go 0
where
go r [] = r
go r (d : ds) = r' `seq` go r' ds
where
r' = r * base + fromIntegral d
{-# INLINE valSimple #-}
-- A sub-quadratic algorithm for Integer. Pairs of adjacent radix b
-- digits are combined into a single radix b^2 digit. This process is
-- repeated until we are left with a single digit. This algorithm
-- performs well only on large inputs, so we use the simple algorithm
-- for smaller inputs.
valInteger :: Integer -> Digits -> Integer
valInteger b0 ds0 = go b0 (length ds0) $ map fromIntegral ds0
where
go _ _ [] = 0
go _ _ [d] = d
go b l ds
| l > 40 = b' `seq` go b' l' (combine b ds')
| otherwise = valSimple b ds
where
-- ensure that we have an even number of digits
-- before we call combine:
ds' = if even l then ds else 0 : ds
b' = b * b
l' = (l + 1) `quot` 2
combine b (d1 : d2 : ds) = d `seq` (d : combine b ds)
where
d = d1 * b + d2
combine _ [] = []
combine _ [_] = errorWithoutStackTrace "this should not happen"
-- Calculate a Rational from the exponent [of 10 to multiply with],
-- the integral part of the mantissa and the digits of the fractional
-- part. Leaving the calculation of the power of 10 until the end,
-- when we know the effective exponent, saves multiplications.
-- More importantly, this way we need at most one gcd instead of three.
--
-- frac was never used with anything but Integer and base 10, so
-- those are hardcoded now (trivial to change if necessary).
fracExp :: Integer -> Integer -> Digits -> Rational
fracExp exp mant []
| exp < 0 = mant % (10 ^ (-exp))
| otherwise = fromInteger (mant * 10 ^ exp)
fracExp exp mant (d:ds) = exp' `seq` mant' `seq` fracExp exp' mant' ds
where
exp' = exp - 1
mant' = mant * 10 + fromIntegral d
valDig :: (Eq a, Num a) => a -> Char -> Maybe Int
valDig 2 c
| '0' <= c && c <= '1' = Just (ord c - ord '0')
| otherwise = Nothing
valDig 8 c
| '0' <= c && c <= '7' = Just (ord c - ord '0')
| otherwise = Nothing
valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| 'a' <= c && c <= 'f' = Just (ord c - ord 'a' + 10)
| 'A' <= c && c <= 'F' = Just (ord c - ord 'A' + 10)
| otherwise = Nothing
valDig _ _ = errorWithoutStackTrace "valDig: Bad base"
valDecDig :: Char -> Maybe Int
valDecDig c
| '0' <= c && c <= '9' = Just (ord c - ord '0')
| otherwise = Nothing
-- ----------------------------------------------------------------------
-- other numeric lexing functions
readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
readIntP base isDigit valDigit =
do s <- munch1 isDigit
return (val base (map valDigit s))
{-# SPECIALISE readIntP
:: Integer -> (Char -> Bool) -> (Char -> Int) -> ReadP Integer #-}
readIntP' :: (Eq a, Num a) => a -> ReadP a
readIntP' base = readIntP base isDigit valDigit
where
isDigit c = maybe False (const True) (valDig base c)
valDigit c = maybe 0 id (valDig base c)
{-# SPECIALISE readIntP' :: Integer -> ReadP Integer #-}
readBinP, readOctP, readDecP, readHexP :: (Eq a, Num a) => ReadP a
readBinP = readIntP' 2
readOctP = readIntP' 8
readDecP = readIntP' 10
readHexP = readIntP' 16
{-# SPECIALISE readBinP :: ReadP Integer #-}
{-# SPECIALISE readOctP :: ReadP Integer #-}
{-# SPECIALISE readDecP :: ReadP Integer #-}
{-# SPECIALISE readHexP :: ReadP Integer #-}