ref: 0bd4a33636f6b3dc55da9d84ee3adb3462acb5d2
parent: 1db7ac527ec413001b7a594269e20b57f65d5a41
author: Lennart Augustsson <lennart@augustsson.net>
date: Mon Mar 4 12:32:52 EST 2024
Create location with file name in lexer.
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2023 Lennart Augustsson
+Copyright 2023,2024 Lennart Augustsson
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
--- a/src/MicroHs/Ident.hs
+++ b/src/MicroHs/Ident.hs
@@ -1,9 +1,9 @@
-- Copyright 2023 Lennart Augustsson
-- See LICENSE file for full license.
module MicroHs.Ident(
- Line, Col, Loc,
+ Line, Col,
Ident(..),
- mkIdent, mkIdentLoc, unIdent, isIdent,
+ mkIdent, unIdent, isIdent,
qualIdent, showIdent, setSLocIdent,
ppIdent,
mkIdentSLoc,
@@ -23,7 +23,6 @@
type Line = Int
type Col = Int
-type Loc = (Line, Col)
data SLoc = SLoc !FilePath !Line !Col
-- deriving (Eq)
@@ -59,9 +58,6 @@
mkIdentSLoc :: SLoc -> String -> Ident
mkIdentSLoc = Ident
-
-mkIdentLoc :: FilePath -> Loc -> String -> Ident
-mkIdentLoc fn (l, c) s = Ident (SLoc fn l c) s
unIdent :: Ident -> String
unIdent (Ident _ s) = s
--- a/src/MicroHs/Lex.hs
+++ b/src/MicroHs/Lex.hs
@@ -12,20 +12,20 @@
import Compat
data Token
- = TIdent Loc [String] String -- identifier
- | TString Loc String -- String literal
- | TChar Loc Char -- Char literal
- | TInt Loc Integer -- Integer literal
- | TRat Loc Rational -- Rational literal (i.e., decimal number)
- | TSpec Loc Char -- one of ()[]{},`;- -- for synthetic {} we use <>, also- -- . for record selection
- -- ~ for lazy
- -- ! for strict
- -- NOT YET @ for type app
- | TError Loc String -- lexical error
- | TBrace Loc -- {n} in the Haskell report- | TIndent Loc -- <n> in the Haskell report
+ = TIdent SLoc [String] String -- identifier
+ | TString SLoc String -- String literal
+ | TChar SLoc Char -- Char literal
+ | TInt SLoc Integer -- Integer literal
+ | TRat SLoc Rational -- Rational literal (i.e., decimal number)
+ | TSpec SLoc Char -- one of ()[]{},`;+ -- for synthetic {} we use <>, also+ -- . for record selection
+ -- ~ for lazy
+ -- ! for strict
+ -- NOT YET @ for type app
+ | TError SLoc String -- lexical error
+ | TBrace SLoc -- {n} in the Haskell report+ | TIndent SLoc -- <n> in the Haskell report
| TEnd
| TRaw [Token]
deriving (Show)
@@ -45,51 +45,25 @@
showToken TEnd = "EOF"
showToken (TRaw _) = "TRaw"
-incrLine :: Loc -> Loc
-incrLine (l, _) = (l+1, 1)
+incrLine :: SLoc -> SLoc
+incrLine (SLoc f l _) = SLoc f (l+1) 1
-addCol :: Loc -> Int -> Loc
-addCol (l, c) i = (l, c + i)
+addCol :: SLoc -> Int -> SLoc
+addCol (SLoc f l c) i = SLoc f l (c + i)
-tabCol :: Loc -> Loc
-tabCol (l, c) = (l, ((c + 7) `quot` 8) * 8)
+tabCol :: SLoc -> SLoc
+tabCol (SLoc f l c) = SLoc f l (((c + 7) `quot` 8) * 8)
-mkLoc :: Line -> Col -> Loc
-mkLoc l c = (l, c)
+mkLocEOF :: SLoc
+mkLocEOF = SLoc "" (-1) 0
-mkLocEOF :: Loc
-mkLocEOF = (-1,0)
+getCol :: SLoc -> Col
+getCol (SLoc _ _ c) = c
-getCol :: Loc -> Col
-getCol (_, c) = c
-
---getLin :: Loc -> Col
---getLin (l, _) = l
-
-{- This is slower and allocates more.- It needs some strictness, probably
-type Loc = Int
-
-incrLine :: Loc -> Loc
-incrLine l = (quot l 1000000 + 1) * 1000000 + 1
-
-addCol :: Loc -> Int -> Loc
-addCol loc i = loc + i
-
-mkLoc :: Line -> Col -> Loc
-mkLoc l c = l * 1000000 + c
-
-getCol :: Loc -> Col
-getCol loc = rem loc 1000000
-
-getLin :: Loc -> Line
-getLin loc = quot loc 1000000
--}
-
---------
-- | Take a location and string and produce a list of tokens
-lex :: Loc -> String -> [Token]
+lex :: SLoc -> String -> [Token]
lex loc (' ':cs) = lex (addCol loc 1) cs lex loc ('\n':cs) = tIndent (lex (incrLine loc) cs) lex loc ('\r':cs) = lex loc cs@@ -125,12 +99,12 @@
lex loc (d:_) = [TError loc $ "Unrecognized input: " ++ show d]
lex _ [] = []
-hexNumber :: Loc -> String -> [Token]
+hexNumber :: SLoc -> String -> [Token]
hexNumber loc cs =
case span isHexDigit cs of
(ds, rs) -> TInt loc (readHex ds) : lex (addCol loc $ length ds + 2) rs
-number :: Loc -> String -> [Token]
+number :: SLoc -> String -> [Token]
number loc cs =
case span isDigit cs of
(ds, rs) | null rs || not (head rs == '.') || (take 2 rs) == ".." ->
@@ -151,7 +125,7 @@
expo _ = Nothing
-- Skip a {- -} style comment-skipNest :: Loc -> Int -> String -> [Token]
+skipNest :: SLoc -> Int -> String -> [Token]
skipNest loc 0 cs = lex loc cs
skipNest loc n ('{':'-':cs) = skipNest (addCol loc 2) (n + 1) cs skipNest loc n ('-':'}':cs) = skipNest (addCol loc 2) (n - 1) cs@@ -162,7 +136,7 @@
skipNest loc _ [] = [TError loc "Unclosed {- comment"]-- Skip a -- style comment
-skipLine :: Loc -> String -> [Token]
+skipLine :: SLoc -> String -> [Token]
skipLine loc cs@('\n':_) = lex loc csskipLine loc (_:cs) = skipLine loc cs
skipLine _ [] = []
@@ -174,7 +148,7 @@
tIndent ts@(TIndent _ : _) = ts
tIndent ts = TIndent (tokensLoc ts) : ts
-takeChars :: Loc -> (String -> Token) -> Char -> Loc -> String -> String -> (Token, Loc, String)
+takeChars :: SLoc -> (String -> Token) -> Char -> SLoc -> String -> String -> (Token, SLoc, String)
takeChars oloc _ c loc _ [] = (TError oloc ("Unmatched " ++ [c]), loc, []) takeChars oloc fn c loc str ('\\':cs) = let skipGap l (' ' :rs) = skipGap (addCol l 1) rs@@ -232,7 +206,7 @@
where specChars :: String
specChars = "()[],{}`;"-upperIdent :: Loc -> Loc -> [String] -> String -> [Token]
+upperIdent :: SLoc -> SLoc -> [String] -> String -> [Token]
--upperIdent l c qs acs | trace (show (l, c, qs, acs)) False = undefined
upperIdent loc sloc qs acs =
case span isIdentChar acs of
@@ -248,7 +222,7 @@
}
_ -> TIdent sloc (reverse qs) ds : lex (addCol loc $ length ds) rs
-tIdent :: Loc -> [String] -> String -> [Token] -> [Token]
+tIdent :: SLoc -> [String] -> String -> [Token] -> [Token]
tIdent loc qs kw ats | elem kw ["let", "where", "do", "of"]
= ti : tBrace ats
| otherwise = ti : ats
@@ -260,7 +234,7 @@
tBrace (TIndent _ : ts) = TBrace (tokensLoc ts) : ts
tBrace ts = TBrace (tokensLoc ts) : ts
-tokensLoc :: [Token] -> Loc
+tokensLoc :: [Token] -> SLoc
tokensLoc (TIdent loc _ _:_) = loc
tokensLoc (TString loc _ :_) = loc
tokensLoc (TChar loc _ :_) = loc
@@ -332,5 +306,5 @@
where skip (TIndent _ : rs) = rs
skip rs = rs
-lexTopLS :: String -> LexState
-lexTopLS s = LS $ layoutLS (lexStart $ lex (mkLoc 1 1) s) []
+lexTopLS :: FilePath -> String -> LexState
+lexTopLS f s = LS $ layoutLS (lexStart $ lex (SLoc f 1 1) s) []
--- a/src/MicroHs/Parse.hs
+++ b/src/MicroHs/Parse.hs
@@ -9,15 +9,12 @@
import Data.List
import Text.ParserComb as P
import MicroHs.Lex
-import MicroHs.Expr
+import MicroHs.Expr hiding (getSLoc)
import MicroHs.Ident
--import Debug.Trace
-type P a = Prsr FilePath LexState Token a
+type P a = Prsr () LexState Token a
-getFileName :: P FilePath
-getFileName = get
-
parseDie :: forall a . (Show a) =>
P a -> FilePath -> String -> a
parseDie p fn file =
@@ -28,15 +25,15 @@
parse :: forall a . (Show a) =>
P a -> FilePath -> String -> Either String a
parse p fn file =
- let { ts = lexTopLS file } in- case runPrsr fn p ts of
+ let { ts = lexTopLS fn file } in+ case runPrsr () p ts of
Left lf -> Left $ formatFailed fn (tmRawTokens ts) lf
Right [(a, _)] -> Right a
Right as -> Left $ "Ambiguous:"
++ unlines (map (show . fst) as)
-getLoc :: P Loc
-getLoc = do
+getSLoc :: P SLoc
+getSLoc = do
t <- nextToken
pure (tokensLoc [t])
@@ -75,9 +72,8 @@
-- Possibly qualified alphanumeric identifier
pQIdent :: P Ident
pQIdent = do
- fn <- getFileName
let
- is (TIdent loc qs s) | isAlpha_ (head s) = Just (qualName fn loc qs s)
+ is (TIdent loc qs s) | isAlpha_ (head s) = Just (qualName loc qs s)
is _ = Nothing
satisfyM "QIdent" is
@@ -84,9 +80,8 @@
-- Upper case, unqualified, alphanumeric identifier
pUIdentA :: P Ident
pUIdentA = do
- fn <- getFileName
let
- is (TIdent loc [] s) | isUpper (head s) = Just (mkIdentLoc fn loc s)
+ is (TIdent loc [] s) | isUpper (head s) = Just (mkIdentSLoc loc s)
is _ = Nothing
satisfyM "UIdent" is
@@ -103,10 +98,9 @@
-- Special "identifiers": () [] (,) ...
pUIdentSpecial :: P Ident
pUIdentSpecial = do
- fn <- getFileName
- loc <- getLoc
+ loc <- getSLoc
let
- mk = mkIdentLoc fn loc
+ mk = mkIdentSLoc loc
(mk . map (const ',') <$> (pSpec '(' *> esome (pSpec ',') <* pSpec ')')) <|< (mk "()" <$ (pSpec '(' *> pSpec ')')) -- Allow () as a constructor name@@ -115,9 +109,8 @@
-- Upper case, possibly qualified, alphanumeric identifier
pUQIdentA :: P Ident
pUQIdentA = do
- fn <- getFileName
let
- is (TIdent loc qs s) | isUpper (head s) = Just (qualName fn loc qs s)
+ is (TIdent loc qs s) | isUpper (head s) = Just (qualName loc qs s)
is _ = Nothing
satisfyM "UQIdent" is
@@ -130,9 +123,8 @@
-- Lower case, unqualified identifier
pLIdent :: P Ident
pLIdent = do
- fn <- getFileName
let
- is (TIdent loc [] s) | isLower_ (head s) && not (elem s keywords) = Just (mkIdentLoc fn loc s)
+ is (TIdent loc [] s) | isLower_ (head s) && not (elem s keywords) = Just (mkIdentSLoc loc s)
is _ = Nothing
satisfyM "LIdent" is
@@ -139,9 +131,8 @@
-- Lower case, possibly qualified identifier
pLQIdent :: P Ident
pLQIdent = do
- fn <- getFileName
let
- is (TIdent loc qs s) | isLower_ (head s) && not (elem s keywords) = Just (qualName fn loc qs s)
+ is (TIdent loc qs s) | isLower_ (head s) && not (elem s keywords) = Just (qualName loc qs s)
is _ = Nothing
satisfyM "LQIdent" is
@@ -156,7 +147,7 @@
"let", "module", "newtype", "of", "primitive", "then", "type", "where"]
pSpec :: Char -> P ()
-pSpec c = () <$ satisfy (showToken $ TSpec (0,0) c) is
+pSpec c = () <$ satisfy (showToken $ TSpec (SLoc "" 0 0) c) is
where
is (TSpec _ d) = c == d
is _ = False
@@ -175,17 +166,15 @@
pQSymOper :: P Ident
pQSymOper = do
- fn <- getFileName
let
- is (TIdent loc qs s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (qualName fn loc qs s)
+ is (TIdent loc qs s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (qualName loc qs s)
is _ = Nothing
satisfyM "QSymOper" is
pSymOper :: P Ident
pSymOper = do
- fn <- getFileName
let
- is (TIdent loc [] s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (mkIdentLoc fn loc s)
+ is (TIdent loc [] s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (mkIdentSLoc loc s)
is _ = Nothing
satisfyM "SymOper" is
@@ -217,10 +206,9 @@
-- Parse ->, possibly qualified
pQArrow :: P Ident
pQArrow = do
- fn <- getFileName
let
- is (TIdent loc qs s@"->") = Just (qualName fn loc qs s)
- is (TIdent loc qs s@"\x2192") = Just (qualName fn loc qs s)
+ is (TIdent loc qs s@"->") = Just (qualName loc qs s)
+ is (TIdent loc qs s@"\x2192") = Just (qualName loc qs s)
is _ = Nothing
satisfyM "->" is
@@ -248,12 +236,11 @@
pLit :: P Expr
pLit = do
- fn <- getFileName
let
- is (TString (l, c) s) = Just (ELit (SLoc fn l c) (LStr s))
- is (TChar (l, c) a) = Just (ELit (SLoc fn l c) (LChar a))
- is (TInt (l, c) i) = Just (ELit (SLoc fn l c) (LInteger i))
- is (TRat (l, c) d) = Just (ELit (SLoc fn l c) (LRat d))
+ is (TString loc s) = Just (ELit loc (LStr s))
+ is (TChar loc a) = Just (ELit loc (LChar a))
+ is (TInt loc i) = Just (ELit loc (LInteger i))
+ is (TRat loc d) = Just (ELit loc (LRat d))
is _ = Nothing
satisfyM "literal" is
@@ -593,9 +580,8 @@
pQualDo :: P Ident
pQualDo = do
- fn <- getFileName
let
- is (TIdent loc qs@(_:_) "do") = Just (mkIdentLoc fn loc (intercalate "." qs))
+ is (TIdent loc qs@(_:_) "do") = Just (mkIdentSLoc loc (intercalate "." qs))
is _ = Nothing
satisfyM "QualDo" is
@@ -602,7 +588,7 @@
pOperComma :: P Ident
pOperComma = pOper <|< pComma
where
- pComma = mkIdentLoc <$> getFileName <*> getLoc <*> ("," <$ pSpec ',')+ pComma = mkIdentSLoc <$> getSLoc <*> ("," <$ pSpec ',')-- No right section for '-'.
pOperCommaNoMinus :: P Ident
@@ -700,16 +686,15 @@
isAlpha_ :: Char -> Bool
isAlpha_ c = isLower_ c || isUpper c
-qualName :: FilePath -> Loc -> [String] -> String -> Ident
-qualName fn loc qs s = mkIdentLoc fn loc (intercalate "." (qs ++ [s]))
+qualName :: SLoc -> [String] -> String -> Ident
+qualName loc qs s = mkIdentSLoc loc (intercalate "." (qs ++ [s]))
-------------
formatFailed :: String -> [Token] -> LastFail Token -> String
-formatFailed fn _fs (LastFail _ ts msgs) =
+formatFailed _fn _fs (LastFail _ ts msgs) =
let
- (line, col) = tokensLoc ts
- sloc = SLoc fn line col
+ sloc = tokensLoc ts
in
showSLoc sloc ++ ":\n"
++ " found: " ++ head (map showToken ts ++ ["EOF"]) ++ "\n"
--- a/src/Text/ParserComb.hs
+++ b/src/Text/ParserComb.hs
@@ -59,11 +59,9 @@
case tmNextToken tm of
(t, _) -> [t]
-type Res :: Type -> Type -> Type -> Type -> Type
data Res s tm t a = Many [(a, (tm, s))] (LastFail t)
--deriving (Show)
-type Prsr :: Type -> Type -> Type -> Type -> Type
data Prsr s tm t a = P ((tm, s) -> Res s tm t a)
--instance Show (Prsr s t a) where show _ = "<<Prsr>>"
--
⑨