shithub: MicroHs

Download patch

ref: f1a9ba4b5b5af7283308387d0ead1aaf7a0855cc
parent: e94e1019b438c6a9a8361d7580ba7aebc62ce476
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Mon Sep 25 13:24:14 EDT 2023

Simplify and improve lexer a little.

--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@
 * case expressions
 * let expressions
 * tuples
-* list syntax (for stuff like `[x..y]` you unfortunately need to write `[x .. y]`, as the parsers support for `Double` literals is simple)
+* list syntax
 * list comprehensions
 * arithmetic and comparison operators, the prelude exports the ones for `Int`, but for the other types you need to do a qulified import (e.g for `Double` and for `Word`).
 * qualified `do` notation, e.g., `IO.do`
--- a/comb/mhs.comb
+++ b/comb/mhs.comb
@@ -1,3 +1,3 @@
 v3.5
-925
-(($A :0 _810) (($A :1 (($B _856) _0)) (($A :2 ((($S' _856) _0) $I)) (($A :3 _780) (($A :4 (_3 "undefined")) (($A :5 $I) (($A :6 ((($C' $B) _809) (($C _71) _5))) (($A :7 ((($C' _6) (_827 _68)) ((_71 _825) _67))) (($A :8 (($B (($S _856) _825)) _3)) (($A :9 $T) (($A :10 ($T $I)) (($A :11 (($B (_71 _183)) _10)) (($A :12 (($B ($B (_70 _9))) ((($C' $B) (($B $C) _10)) ($B _10)))) (($A :13 (($B ($B (_70 _9))) ((($C' $B) (($B $C) _10)) ($BK _10)))) (($A :14 (($B (_70 _9)) $P)) (($A :15 (($B ($B (_70 _9))) (($B (($C' $C) _10)) ($B $P)))) (($A :16 _15) (($A :17 (($B (_70 _9)) ($B ($P _738)))) (($A :18 (($B (_70 _9)) ($BK ($P _738)))) (($A :19 ((_70 _9) (($S $P) $I))) (($A :20 (($B (_70 _9)) (($C ($S' $P)) $I))) (($A :21 (($B $Y) (($B ($B ($P (_14 _111)))) ((($C' $B) (($B ($C' $B)) ($B _12))) ((($C' ($C' $B)) ($B _12)) (($B ($B _14)) _112)))))) (($A :22 (($B $Y) (($B ($B ($P (_14 _738)))) (($B ($C' $B)) ($B _13))))) (($A :23 _3) (($A :24 ($T (_14 _738))) (($A :25 (($C $C) _32)) (($A :26 ($T _31)) (($A :27 (($P _32) _31)) (($A :28 _32) (($A :29 (($C (($C $S') _27)) $I)) (($A :30 (($C $S) _27)) (($A :31 $K) (($A :32 $A) (($A :33 _785) (($A :34 _786) (($A :35 ((($S' _26) (_777 97)) (($C _777) 122))) (($A :36 ((($S' _26) (_777 65)) (($C _777) 90))) (($A :37 ((($S' _25) _35) _36)) (($A :38 ((($S' _26) (_777 48)) (($C _777) 57))) (($A :39 ((($S' _26) (_777 32)) (($C _777) 126))) (($A :40 _774) (($A :41 _775) (($A :42 _777) (($A :43 _776) (($A :44 ((($S' _25) (($C _40) 32)) ((($S' _25) (($C _40) 9)) (($C _40) 10)))) (($A :45 _745) (($A :46 _746) (($A :47 _747) (($A :48 _748) (($A :49 (_46 %0.0)) (($A :50 _45) (($A :51 _46) (($A :52 _47) (($A :53 _48) (($A :54 _749) (($A :55 _750) (($A :56 _54) (($A :57 _55) (($A :58 _751) (($A :59 _752) (($A :60 _753) (($A :61 _754) (($A :62 _58) (($A :63 _59) (($A :64 _60) (($A :65 _61) (($A :66 _755) (($A :67 (($B $BK) $T)) (($A :68 ($BK $T)) (($A :69 $P) (($A :70 $I) (($A :71 $B) (($A :72 $I) (($A :73 $K) (($A :74 $C) (($A :75 _781) (($A :76 (($C (($C $S') _183)) _184)) (($A :77 ((($C' ($S' ($C' $B))) $B) $I)) (($A :78 _739) (($A :79 _740) (($A :80 _741) (($A :81 _742) (($A :82 _743) (($A :83 _744) (($A :84 (_79 0)) (($A :85 _762) (($A :86 _763) (($A :87 _764) (($A :88 _765) (($A :89 _766) (($A :90 _767) (($A :91 _85) (($A :92 ($BK $K)) (($A :93 (($B $BK) (($B ($B $BK)) $P))) (($A :94 (($B ($B ($B $BK))) (($B ($B ($B $BK))) (($B ($B ($B $C))) (($B ($B $C)) $P))))) (($A :95 ((($S' $S) ((($S' ($S' $C)) ((($C' ($C' $S)) ((($C' $B) (($B ($S' $S')) ((($C' $B) (($B _25) (_88 0))) (_85 0)))) (($B ($B (($C' $P) (_83 1)))) _78))) ($C $P))) _81)) _82)) (($A :96 _92) (($A :97 ((($S' $C) (($B ($P _172)) ((($C' ($C' $B)) ((($C' $C) _85) _172)) _173))) (($B (($C' ($C' ($C' $C))) ((($C' ($C' ($C' $C))) ((($C' ($C' ($C' ($C' $S')))) (($B ($B ($B ($B $C)))) (($B (($C' ($C' ($C' $C))) (($B ($B ($B (($S' $S') (_85 0))))) (($B (($C' ($C' $C)) (($B ($B (($S' $S') (_85 1)))) (($B (($C' $C) (($B (($C' $S') (_85 2))) ($C _97)))) ($C _97))))) ($C _97))))) ($C _97)))) ($T $K))) ($T $A)))) (($C _95) 4)))) (($A :98 (_104 _73)) (($A :99 ((_119 (_76 _98)) _96)) (($A :100 (($C ((($C' $B) (($P _111) ((($C' ($C' $O)) $P) $K))) ((($S' ($C' ($C' ($C' $B)))) (($B ($B ($B ($B _101)))) ((($S' ($C' ($C' $B))) (($B ($B ($B _101))) ((($S' ($C' $B)) (($B ($B _101)) ((($C' $B) (($B _117) ($T 0))) _100))) ((($C' $B) (($B _117) ($T 1))) _100)))) ((($C' $B) (($B _117) ($T 2))) _100)))) ((($C' $B) (($B _117) ($T 3))) _100)))) (($B $T) (($B ($B $P)) (($C' _78) (_80 4)))))) (($A :101 (($S $S) (($B $BK) (($B $BK) ((($S' $S) $T) (($B $BK) (($B $BK) (($C ((($S' $C') $S) (($B ($B ($B ($S $B)))) (($B ($B ($B ($B ($B $BK))))) (($B (($S' ($C' $B)) (($B $B') $B'))) (($B ($B ($B ($B ($B ($S $B)))))) (($B ($B ($B ($B ($B ($B ($B $BK))))))) ((($C' $B) ($B' ($B' (($B ($C' ($C' ($C' $C)))) (($B (($C' $B) ($B' (($B $C) _87)))) (($B (($C' $B) _112)) _101)))))) (($B (($C' $B) _112)) ($C _101)))))))))) (((_737 "lib/Data/IntMap.hs") 3) 8))))))))) (($A :102 ((_71 (_117 _183)) _100)) (($A :103 ((($C' $C) ((($C' $C) ($C _97)) (_3 "Data.IntMap.!"))) $I)) (($A :104 (($B ((
\ No newline at end of file
+926
+(($A :0 _811) (($A :1 (($B _857) _0)) (($A :2 ((($S' _857) _0) $I)) (($A :3 _781) (($A :4 (_3 "undefined")) (($A :5 $I) (($A :6 ((($C' $B) _810) (($C _71) _5))) (($A :7 ((($C' _6) (_828 _68)) ((_71 _826) _67))) (($A :8 (($B (($S _857) _826)) _3)) (($A :9 $T) (($A :10 ($T $I)) (($A :11 (($B (_71 _183)) _10)) (($A :12 (($B ($B (_70 _9))) ((($C' $B) (($B $C) _10)) ($B _10)))) (($A :13 (($B ($B (_70 _9))) ((($C' $B) (($B $C) _10)) ($BK _10)))) (($A :14 (($B (_70 _9)) $P)) (($A :15 (($B ($B (_70 _9))) (($B (($C' $C) _10)) ($B $P)))) (($A :16 _15) (($A :17 (($B (_70 _9)) ($B ($P _739)))) (($A :18 (($B (_70 _9)) ($BK ($P _739)))) (($A :19 ((_70 _9) (($S $P) $I))) (($A :20 (($B (_70 _9)) (($C ($S' $P)) $I))) (($A :21 (($B $Y) (($B ($B ($P (_14 _111)))) ((($C' $B) (($B ($C' $B)) ($B _12))) ((($C' ($C' $B)) ($B _12)) (($B ($B _14)) _112)))))) (($A :22 (($B $Y) (($B ($B ($P (_14 _739)))) (($B ($C' $B)) ($B _13))))) (($A :23 _3) (($A :24 ($T (_14 _739))) (($A :25 (($C $C) _32)) (($A :26 ($T _31)) (($A :27 (($P _32) _31)) (($A :28 _32) (($A :29 (($C (($C $S') _27)) $I)) (($A :30 (($C $S) _27)) (($A :31 $K) (($A :32 $A) (($A :33 _786) (($A :34 _787) (($A :35 ((($S' _26) (_778 97)) (($C _778) 122))) (($A :36 ((($S' _26) (_778 65)) (($C _778) 90))) (($A :37 ((($S' _25) _35) _36)) (($A :38 ((($S' _26) (_778 48)) (($C _778) 57))) (($A :39 ((($S' _26) (_778 32)) (($C _778) 126))) (($A :40 _775) (($A :41 _776) (($A :42 _778) (($A :43 _777) (($A :44 ((($S' _25) (($C _40) 32)) ((($S' _25) (($C _40) 9)) (($C _40) 10)))) (($A :45 _746) (($A :46 _747) (($A :47 _748) (($A :48 _749) (($A :49 (_46 %0.0)) (($A :50 _45) (($A :51 _46) (($A :52 _47) (($A :53 _48) (($A :54 _750) (($A :55 _751) (($A :56 _54) (($A :57 _55) (($A :58 _752) (($A :59 _753) (($A :60 _754) (($A :61 _755) (($A :62 _58) (($A :63 _59) (($A :64 _60) (($A :65 _61) (($A :66 _756) (($A :67 (($B $BK) $T)) (($A :68 ($BK $T)) (($A :69 $P) (($A :70 $I) (($A :71 $B) (($A :72 $I) (($A :73 $K) (($A :74 $C) (($A :75 _782) (($A :76 (($C (($C $S') _183)) _184)) (($A :77 ((($C' ($S' ($C' $B))) $B) $I)) (($A :78 _740) (($A :79 _741) (($A :80 _742) (($A :81 _743) (($A :82 _744) (($A :83 _745) (($A :84 (_79 0)) (($A :85 _763) (($A :86 _764) (($A :87 _765) (($A :88 _766) (($A :89 _767) (($A :90 _768) (($A :91 _85) (($A :92 ($BK $K)) (($A :93 (($B $BK) (($B ($B $BK)) $P))) (($A :94 (($B ($B ($B $BK))) (($B ($B ($B $BK))) (($B ($B ($B $C))) (($B ($B $C)) $P))))) (($A :95 ((($S' $S) ((($S' ($S' $C)) ((($C' ($C' $S)) ((($C' $B) (($B ($S' $S')) ((($C' $B) (($B _25) (_88 0))) (_85 0)))) (($B ($B (($C' $P) (_83 1)))) _78))) ($C $P))) _81)) _82)) (($A :96 _92) (($A :97 ((($S' $C) (($B ($P _172)) ((($C' ($C' $B)) ((($C' $C) _85) _172)) _173))) (($B (($C' ($C' ($C' $C))) ((($C' ($C' ($C' $C))) ((($C' ($C' ($C' ($C' $S')))) (($B ($B ($B ($B $C)))) (($B (($C' ($C' ($C' $C))) (($B ($B ($B (($S' $S') (_85 0))))) (($B (($C' ($C' $C)) (($B ($B (($S' $S') (_85 1)))) (($B (($C' $C) (($B (($C' $S') (_85 2))) ($C _97)))) ($C _97))))) ($C _97))))) ($C _97)))) ($T $K))) ($T $A)))) (($C _95) 4)))) (($A :98 (_104 _73)) (($A :99 ((_119 (_76 _98)) _96)) (($A :100 (($C ((($C' $B) (($P _111) ((($C' ($C' $O)) $P) $K))) ((($S' ($C' ($C' ($C' $B)))) (($B ($B ($B ($B _101)))) ((($S' ($C' ($C' $B))) (($B ($B ($B _101))) ((($S' ($C' $B)) (($B ($B _101)) ((($C' $B) (($B _117) ($T 0))) _100))) ((($C' $B) (($B _117) ($T 1))) _100)))) ((($C' $B) (($B _117) ($T 2))) _100)))) ((($C' $B) (($B _117) ($T 3))) _100)))) (($B $T) (($B ($B $P)) (($C' _78) (_80 4)))))) (($A :101 (($S $S) (($B $BK) (($B $BK) ((($S' $S) $T) (($B $BK) (($B $BK) (($C ((($S' $C') $S) (($B ($B ($B ($S $B)))) (($B ($B ($B ($B ($B $BK))))) (($B (($S' ($C' $B)) (($B $B') $B'))) (($B ($B ($B ($B ($B ($S $B)))))) (($B ($B ($B ($B ($B ($B ($B $BK))))))) ((($C' $B) ($B' ($B' (($B ($C' ($C' ($C' $C)))) (($B (($C' $B) ($B' (($B $C) _87)))) (($B (($C' $B) _112)) _101)))))) (($B (($C' $B) _112)) ($C _101)))))))))) (((_738 "lib/Data/IntMap.hs") 3) 8))))))))) (($A :102 ((_71 (_117 _183)) _100)) (($A :103 ((($C' $C) ((($C' $C) ($C _97)) (_3 "Data.IntMap.!"))) $I)) (($A :104 (($B ((
\ No newline at end of file
--- a/ghc/Data/Double.hs
+++ b/ghc/Data/Double.hs
@@ -1,4 +1,4 @@
-module Data.Double(Double, showDouble) where
+module Data.Double(Double, showDouble, negate) where
 
 showDouble :: Double -> [Char]
-showDouble = show
\ No newline at end of file
+showDouble = show
--- a/src/MicroHs/Desugar.hs
+++ b/src/MicroHs/Desugar.hs
@@ -154,7 +154,7 @@
       n = length is
       ev = Var v
       one m i = letE i (mkTupleSel m n ev)
-      bnds = foldr (.) id $ zipWith one [0 .. ] is
+      bnds = foldr (.) id $ zipWith one [0..] is
   in  letRecE v (bnds $ mkTuple es) $
       bnds body
 
--- a/src/MicroHs/Graph.hs
+++ b/src/MicroHs/Graph.hs
@@ -103,7 +103,7 @@
 
     max_v           = length edges0 - 1
     sorted_edges    = sortLE lek edges0
-    edges1          = zip [0 .. ] sorted_edges
+    edges1          = zip [0..] sorted_edges
 
     key_map         = IM.fromList [(v, k)                      | (v, (_,    k, _ )) <- edges1]
 
--- a/src/MicroHs/Lex.hs
+++ b/src/MicroHs/Lex.hs
@@ -88,16 +88,8 @@
   case span isIdentChar cs of
     (ds, rs) -> tIdent loc [] (d:ds) (lex (addCol loc $ 1 + length ds) rs)
 lex loc cs@(d:_) | isUpper d = upperIdent loc loc [] cs
-lex loc ('-':d:cs) | isDigit d =
-  case span isDigit cs of
-    (ds, rs) | null rs || not (eqChar (head rs) '.') -> TInt loc (readInt ('-':d:ds)) : lex (addCol loc $ 2 + length ds) rs
-             | otherwise -> case span isDigit (tail rs) of
-      (ns, rs') -> TDouble loc (readDouble (('-':d:ds) ++ ('.':ns))) : lex (addCol loc $ 3 + length ds + length ns) rs'
-lex loc (d:cs) | isDigit d =
-  case span isDigit cs of
-    (ds, rs) | null rs || not (eqChar (head rs) '.') -> TInt loc (readInt (d:ds)) : lex (addCol loc $ 1 + length ds) rs
-             | otherwise -> case span isDigit (tail rs) of
-      (ns, rs') -> TDouble loc (readDouble ((d:ds) ++ ('.':ns))) : lex (addCol loc $ 2 + length ds + length ns) rs' 
+lex loc ('-':cs@(d:_)) | isDigit d = number loc 1 cs
+lex loc      cs@(d:_)  | isDigit d = number loc 0 cs
 lex loc (d:cs) | isOperChar d  =
   case span isOperChar cs of
     (ds, rs) -> TIdent loc [] (d:ds) : lex (addCol loc $ 1 + length ds) rs
@@ -113,6 +105,21 @@
 lex loc (d:_) = [TError loc $ "Unrecognized input: " ++ showChar d]
 lex _ [] = []
 
+number :: Loc -> Int -> String -> [Token]   -- neg=1 means negative, neg=0 means positive
+number loc neg cs =
+  case span isDigit cs of
+    (ds, rs) | null rs || not (eqChar (head rs) '.') || eqString (take 2 rs) ".." ->
+               let i = readInt ds
+                   i' = if neg == 0 then i else negate i
+               in  TInt loc i' : lex (addCol loc $ neg + 1 + length ds) rs
+             | otherwise ->
+               case span isDigit (tail rs) of
+                 (ns, rs') ->
+                   let d = readDouble (ds ++ ('.':ns))
+                       d' = if neg == 0 then d else D.negate d
+                   in  TDouble loc d' : lex (addCol loc $ neg + 2 + length ds + length ns) rs' 
+
+
 -- Skip a {- -} style comment
 skipNest :: Loc -> Int -> String -> [Token]
 skipNest loc 0 cs           = lex loc cs
@@ -196,7 +203,7 @@
 tokensLoc (TIndent loc    :_) = loc
 tokensLoc []                  = mkLoc 0 1
 
--- | This appears to be the magical layout resolver, I wondered where it was...
+-- | This is the magical layout resolver, straight from the Haskell report.
 layout :: [Int] -> [Token] -> [Token]
 layout mms@(m : ms) tts@(TIndent x       : ts) | n == m = TSpec (tokensLoc ts) ';' : layout    mms  ts
                                                | n <  m = TSpec (tokensLoc ts) '}' : layout     ms tts where {n = getCol x}
--