shithub: MicroHs

Download patch

ref: 0bd4a33636f6b3dc55da9d84ee3adb3462acb5d2
parent: 1db7ac527ec413001b7a594269e20b57f65d5a41
author: Lennart Augustsson <lennart@augustsson.net>
date: Mon Mar 4 12:32:52 EST 2024

Create location with file name in lexer.

--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2023 Lennart Augustsson
+Copyright 2023,2024 Lennart Augustsson
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
--- a/src/MicroHs/Ident.hs
+++ b/src/MicroHs/Ident.hs
@@ -1,9 +1,9 @@
 -- Copyright 2023 Lennart Augustsson
 -- See LICENSE file for full license.
 module MicroHs.Ident(
-  Line, Col, Loc,
+  Line, Col,
   Ident(..),
-  mkIdent, mkIdentLoc, unIdent, isIdent,
+  mkIdent, unIdent, isIdent,
   qualIdent, showIdent, setSLocIdent,
   ppIdent,
   mkIdentSLoc,
@@ -23,7 +23,6 @@
 
 type Line = Int
 type Col  = Int
-type Loc  = (Line, Col)
 
 data SLoc = SLoc !FilePath !Line !Col
 --  deriving (Eq)
@@ -59,9 +58,6 @@
 
 mkIdentSLoc :: SLoc -> String -> Ident
 mkIdentSLoc = Ident
-
-mkIdentLoc :: FilePath -> Loc -> String -> Ident
-mkIdentLoc fn (l, c) s = Ident (SLoc fn l c) s
 
 unIdent :: Ident -> String
 unIdent (Ident _ s) = s
--- a/src/MicroHs/Lex.hs
+++ b/src/MicroHs/Lex.hs
@@ -12,20 +12,20 @@
 import Compat
 
 data Token
-  = TIdent  Loc [String] String  -- identifier
-  | TString Loc String           -- String literal
-  | TChar   Loc Char             -- Char literal
-  | TInt    Loc Integer          -- Integer literal
-  | TRat    Loc Rational         -- Rational literal (i.e., decimal number)
-  | TSpec   Loc Char             -- one of ()[]{},`;
-                                 -- for synthetic {} we use <>, also
-                                 --  .  for record selection
-                                 --  ~  for lazy
-                                 --  !  for strict
-                                 --  NOT YET  @  for type app
-  | TError  Loc String           -- lexical error
-  | TBrace  Loc                  -- {n} in the Haskell report
-  | TIndent Loc                  -- <n> in the Haskell report
+  = TIdent  SLoc [String] String  -- identifier
+  | TString SLoc String           -- String literal
+  | TChar   SLoc Char             -- Char literal
+  | TInt    SLoc Integer          -- Integer literal
+  | TRat    SLoc Rational         -- Rational literal (i.e., decimal number)
+  | TSpec   SLoc Char             -- one of ()[]{},`;
+                                  -- for synthetic {} we use <>, also
+                                  --  .  for record selection
+                                  --  ~  for lazy
+                                  --  !  for strict
+                                  --  NOT YET  @  for type app
+  | TError  SLoc String           -- lexical error
+  | TBrace  SLoc                  -- {n} in the Haskell report
+  | TIndent SLoc                  -- <n> in the Haskell report
   | TEnd
   | TRaw [Token]
   deriving (Show)
@@ -45,51 +45,25 @@
 showToken TEnd = "EOF"
 showToken (TRaw _) = "TRaw"
 
-incrLine :: Loc -> Loc
-incrLine (l, _) = (l+1, 1)
+incrLine :: SLoc -> SLoc
+incrLine (SLoc f l _) = SLoc f (l+1) 1
 
-addCol :: Loc -> Int -> Loc
-addCol (l, c) i = (l, c + i)
+addCol :: SLoc -> Int -> SLoc
+addCol (SLoc f l c) i = SLoc f l (c + i)
 
-tabCol :: Loc -> Loc
-tabCol (l, c) = (l, ((c + 7) `quot` 8) * 8)
+tabCol :: SLoc -> SLoc
+tabCol (SLoc f l c) = SLoc f l (((c + 7) `quot` 8) * 8)
 
-mkLoc :: Line -> Col -> Loc
-mkLoc l c = (l, c)
+mkLocEOF :: SLoc
+mkLocEOF = SLoc "" (-1) 0
 
-mkLocEOF :: Loc
-mkLocEOF = (-1,0)
+getCol :: SLoc -> Col
+getCol (SLoc _ _ c) = c
 
-getCol :: Loc -> Col
-getCol (_, c) = c
-
---getLin :: Loc -> Col
---getLin (l, _) = l
-
-{-  This is slower and allocates more.
-    It needs some strictness, probably
-type Loc = Int
-
-incrLine :: Loc -> Loc
-incrLine l = (quot l 1000000 + 1) * 1000000 + 1
-
-addCol :: Loc -> Int -> Loc
-addCol loc i = loc + i
-
-mkLoc :: Line -> Col -> Loc
-mkLoc l c = l * 1000000 + c
-
-getCol :: Loc -> Col
-getCol loc = rem loc 1000000
-
-getLin :: Loc -> Line
-getLin loc = quot loc 1000000
--}
-
 ---------
 
 -- | Take a location and string and produce a list of tokens
-lex :: Loc -> String -> [Token]
+lex :: SLoc -> String -> [Token]
 lex loc (' ':cs)  = lex (addCol loc 1) cs
 lex loc ('\n':cs) = tIndent (lex (incrLine loc) cs)
 lex loc ('\r':cs) = lex loc cs
@@ -125,12 +99,12 @@
 lex loc (d:_) = [TError loc $ "Unrecognized input: " ++ show d]
 lex _ [] = []
 
-hexNumber :: Loc -> String -> [Token]
+hexNumber :: SLoc -> String -> [Token]
 hexNumber loc cs =
   case span isHexDigit cs of
     (ds, rs) -> TInt loc (readHex ds) : lex (addCol loc $ length ds + 2) rs
 
-number :: Loc -> String -> [Token]
+number :: SLoc -> String -> [Token]
 number loc cs =
   case span isDigit cs of
     (ds, rs) | null rs || not (head rs == '.') || (take 2 rs) == ".." ->
@@ -151,7 +125,7 @@
     expo _ = Nothing
 
 -- Skip a {- -} style comment
-skipNest :: Loc -> Int -> String -> [Token]
+skipNest :: SLoc -> Int -> String -> [Token]
 skipNest loc 0 cs           = lex loc cs
 skipNest loc n ('{':'-':cs) = skipNest (addCol loc 2) (n + 1) cs
 skipNest loc n ('-':'}':cs) = skipNest (addCol loc 2) (n - 1) cs
@@ -162,7 +136,7 @@
 skipNest loc _ []           = [TError loc "Unclosed {- comment"]
 
 -- Skip a -- style comment
-skipLine :: Loc -> String -> [Token]
+skipLine :: SLoc -> String -> [Token]
 skipLine loc cs@('\n':_) = lex loc cs
 skipLine loc (_:cs)      = skipLine loc cs
 skipLine   _ []          = []
@@ -174,7 +148,7 @@
 tIndent ts@(TIndent _ : _) = ts
 tIndent ts = TIndent (tokensLoc ts) : ts
 
-takeChars :: Loc -> (String -> Token) -> Char -> Loc -> String -> String -> (Token, Loc, String)
+takeChars :: SLoc -> (String -> Token) -> Char -> SLoc -> String -> String -> (Token, SLoc, String)
 takeChars oloc  _ c loc _ [] = (TError oloc ("Unmatched " ++ [c]), loc, [])
 takeChars oloc fn c loc str ('\\':cs) =
   let skipGap l (' ' :rs) = skipGap (addCol l 1) rs
@@ -232,7 +206,7 @@
   where specChars :: String
         specChars = "()[],{}`;"
 
-upperIdent :: Loc -> Loc -> [String] -> String -> [Token]
+upperIdent :: SLoc -> SLoc -> [String] -> String -> [Token]
 --upperIdent l c qs acs | trace (show (l, c, qs, acs)) False = undefined
 upperIdent loc sloc qs acs =
   case span isIdentChar acs of
@@ -248,7 +222,7 @@
            }
       _ -> TIdent sloc (reverse qs) ds : lex (addCol loc $ length ds) rs
 
-tIdent :: Loc -> [String] -> String -> [Token] -> [Token]
+tIdent :: SLoc -> [String] -> String -> [Token] -> [Token]
 tIdent loc qs kw ats | elem kw ["let", "where", "do", "of"]
                                  = ti : tBrace ats
                      | otherwise = ti : ats
@@ -260,7 +234,7 @@
     tBrace (TIndent _ : ts) = TBrace (tokensLoc ts) : ts
     tBrace ts = TBrace (tokensLoc ts) : ts
 
-tokensLoc :: [Token] -> Loc
+tokensLoc :: [Token] -> SLoc
 tokensLoc (TIdent  loc _ _:_) = loc
 tokensLoc (TString loc _  :_) = loc
 tokensLoc (TChar   loc _  :_) = loc
@@ -332,5 +306,5 @@
   where skip (TIndent _ : rs) = rs
         skip rs = rs
 
-lexTopLS :: String -> LexState
-lexTopLS s = LS $ layoutLS (lexStart $ lex (mkLoc 1 1) s) []
+lexTopLS :: FilePath -> String -> LexState
+lexTopLS f s = LS $ layoutLS (lexStart $ lex (SLoc f 1 1) s) []
--- a/src/MicroHs/Parse.hs
+++ b/src/MicroHs/Parse.hs
@@ -9,15 +9,12 @@
 import Data.List
 import Text.ParserComb as P
 import MicroHs.Lex
-import MicroHs.Expr
+import MicroHs.Expr hiding (getSLoc)
 import MicroHs.Ident
 --import Debug.Trace
 
-type P a = Prsr FilePath LexState Token a
+type P a = Prsr () LexState Token a
 
-getFileName :: P FilePath
-getFileName = get
-
 parseDie :: forall a . (Show a) =>
             P a -> FilePath -> String -> a
 parseDie p fn file =
@@ -28,15 +25,15 @@
 parse :: forall a . (Show a) =>
          P a -> FilePath -> String -> Either String a
 parse p fn file =
-  let { ts = lexTopLS file } in
-  case runPrsr fn p ts of
+  let { ts = lexTopLS fn file } in
+  case runPrsr () p ts of
     Left lf -> Left $ formatFailed fn (tmRawTokens ts) lf
     Right [(a, _)] -> Right a
     Right as -> Left $ "Ambiguous:"
                        ++ unlines (map (show . fst) as)
 
-getLoc :: P Loc
-getLoc = do
+getSLoc :: P SLoc
+getSLoc = do
   t <- nextToken
   pure (tokensLoc [t])
 
@@ -75,9 +72,8 @@
 -- Possibly qualified alphanumeric identifier
 pQIdent :: P Ident
 pQIdent = do
-  fn <- getFileName
   let
-    is (TIdent loc qs s) | isAlpha_ (head s) = Just (qualName fn loc qs s)
+    is (TIdent loc qs s) | isAlpha_ (head s) = Just (qualName loc qs s)
     is _ = Nothing
   satisfyM "QIdent" is
 
@@ -84,9 +80,8 @@
 -- Upper case, unqualified, alphanumeric identifier
 pUIdentA :: P Ident
 pUIdentA = do
-  fn <- getFileName
   let
-    is (TIdent loc [] s) | isUpper (head s) = Just (mkIdentLoc fn loc s)
+    is (TIdent loc [] s) | isUpper (head s) = Just (mkIdentSLoc loc s)
     is _ = Nothing
   satisfyM "UIdent" is
 
@@ -103,10 +98,9 @@
 -- Special "identifiers": () [] (,) ...
 pUIdentSpecial :: P Ident
 pUIdentSpecial = do
-  fn <- getFileName
-  loc <- getLoc
+  loc <- getSLoc
   let
-    mk = mkIdentLoc fn loc
+    mk = mkIdentSLoc loc
   
   (mk . map (const ',') <$> (pSpec '(' *> esome (pSpec ',') <* pSpec ')'))
     <|< (mk "()" <$ (pSpec '(' *> pSpec ')'))  -- Allow () as a constructor name
@@ -115,9 +109,8 @@
 -- Upper case, possibly qualified, alphanumeric identifier
 pUQIdentA :: P Ident
 pUQIdentA = do
-  fn <- getFileName
   let
-    is (TIdent loc qs s) | isUpper (head s) = Just (qualName fn loc qs s)
+    is (TIdent loc qs s) | isUpper (head s) = Just (qualName loc qs s)
     is _ = Nothing
   satisfyM "UQIdent" is
 
@@ -130,9 +123,8 @@
 -- Lower case, unqualified identifier
 pLIdent :: P Ident
 pLIdent = do
-  fn <- getFileName
   let
-    is (TIdent loc [] s) | isLower_ (head s) && not (elem s keywords) = Just (mkIdentLoc fn loc s)
+    is (TIdent loc [] s) | isLower_ (head s) && not (elem s keywords) = Just (mkIdentSLoc loc s)
     is _ = Nothing
   satisfyM "LIdent" is
 
@@ -139,9 +131,8 @@
 -- Lower case, possibly qualified identifier
 pLQIdent :: P Ident
 pLQIdent = do
-  fn <- getFileName
   let
-    is (TIdent loc qs s) | isLower_ (head s) && not (elem s keywords) = Just (qualName fn loc qs s)
+    is (TIdent loc qs s) | isLower_ (head s) && not (elem s keywords) = Just (qualName loc qs s)
     is _ = Nothing
   satisfyM "LQIdent" is
 
@@ -156,7 +147,7 @@
    "let", "module", "newtype", "of", "primitive", "then", "type", "where"]
 
 pSpec :: Char -> P ()
-pSpec c = () <$ satisfy (showToken $ TSpec (0,0) c) is
+pSpec c = () <$ satisfy (showToken $ TSpec (SLoc "" 0 0) c) is
   where
     is (TSpec _ d) = c == d
     is _ = False
@@ -175,17 +166,15 @@
 
 pQSymOper :: P Ident
 pQSymOper = do
-  fn <- getFileName
   let
-    is (TIdent loc qs s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (qualName fn loc qs s)
+    is (TIdent loc qs s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (qualName loc qs s)
     is _ = Nothing
   satisfyM "QSymOper" is
 
 pSymOper :: P Ident
 pSymOper = do
-  fn <- getFileName
   let
-    is (TIdent loc [] s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (mkIdentLoc fn loc s)
+    is (TIdent loc [] s) | not (isAlpha_ (head s)) && not (elem s reservedOps) = Just (mkIdentSLoc loc s)
     is _ = Nothing
   satisfyM "SymOper" is
 
@@ -217,10 +206,9 @@
 -- Parse ->, possibly qualified
 pQArrow :: P Ident
 pQArrow = do
-  fn <- getFileName
   let
-    is (TIdent loc qs s@"->") = Just (qualName fn loc qs s)
-    is (TIdent loc qs s@"\x2192") = Just (qualName fn loc qs s)
+    is (TIdent loc qs s@"->") = Just (qualName loc qs s)
+    is (TIdent loc qs s@"\x2192") = Just (qualName loc qs s)
     is _ = Nothing
   satisfyM "->" is
 
@@ -248,12 +236,11 @@
 
 pLit :: P Expr
 pLit = do
-  fn <- getFileName
   let
-    is (TString (l, c) s) = Just (ELit (SLoc fn l c) (LStr s))
-    is (TChar   (l, c) a) = Just (ELit (SLoc fn l c) (LChar a))
-    is (TInt    (l, c) i) = Just (ELit (SLoc fn l c) (LInteger i))
-    is (TRat    (l, c) d) = Just (ELit (SLoc fn l c) (LRat d))
+    is (TString loc s) = Just (ELit loc (LStr s))
+    is (TChar   loc a) = Just (ELit loc (LChar a))
+    is (TInt    loc i) = Just (ELit loc (LInteger i))
+    is (TRat    loc d) = Just (ELit loc (LRat d))
     is _ = Nothing
   satisfyM "literal" is
 
@@ -593,9 +580,8 @@
 
 pQualDo :: P Ident
 pQualDo = do
-  fn <- getFileName
   let
-    is (TIdent loc qs@(_:_) "do") = Just (mkIdentLoc fn loc (intercalate "." qs))
+    is (TIdent loc qs@(_:_) "do") = Just (mkIdentSLoc loc (intercalate "." qs))
     is _ = Nothing
   satisfyM "QualDo" is
 
@@ -602,7 +588,7 @@
 pOperComma :: P Ident
 pOperComma = pOper <|< pComma
   where
-    pComma = mkIdentLoc <$> getFileName <*> getLoc <*> ("," <$ pSpec ',')
+    pComma = mkIdentSLoc <$> getSLoc <*> ("," <$ pSpec ',')
 
 -- No right section for '-'.
 pOperCommaNoMinus :: P Ident
@@ -700,16 +686,15 @@
 isAlpha_ :: Char -> Bool
 isAlpha_ c = isLower_ c || isUpper c
 
-qualName :: FilePath -> Loc -> [String] -> String -> Ident
-qualName fn loc qs s = mkIdentLoc fn loc (intercalate "." (qs ++ [s]))
+qualName :: SLoc -> [String] -> String -> Ident
+qualName loc qs s = mkIdentSLoc loc (intercalate "." (qs ++ [s]))
 
 -------------
 
 formatFailed :: String -> [Token] -> LastFail Token -> String
-formatFailed fn _fs (LastFail _ ts msgs) =
+formatFailed _fn _fs (LastFail _ ts msgs) =
   let
-    (line, col) = tokensLoc ts
-    sloc = SLoc fn line col
+    sloc = tokensLoc ts
   in
     showSLoc sloc ++ ":\n"
       ++ "  found:    " ++ head (map showToken ts ++ ["EOF"]) ++ "\n"
--- a/src/Text/ParserComb.hs
+++ b/src/Text/ParserComb.hs
@@ -59,11 +59,9 @@
   case tmNextToken tm of
     (t, _) -> [t]
 
-type Res :: Type -> Type -> Type -> Type -> Type
 data Res s tm t a = Many [(a, (tm, s))] (LastFail t)
   --deriving (Show)
 
-type Prsr :: Type -> Type -> Type -> Type -> Type
 data Prsr s tm t a = P ((tm, s) -> Res s tm t a)
 --instance Show (Prsr s t a) where show _ = "<<Prsr>>"
 
--