shithub: MicroHs

Download patch

ref: 662c9dd76afa73d6f0ed6c9e235d4e9ad071edbf
parent: 3aaa147408b07558240fe10c8eb2e7b29ef45006
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Mon Aug 21 14:15:59 EDT 2023

Use string literals in combinator file.

--- a/comb/mhs.comb
+++ b/comb/mhs.comb
@@ -1,3 +1,3 @@
-v2.2
-656
-(($A :0 ((_484 _438) ((($S' ($C ((($C' ($S' _484)) ($C _2)) (($B ($B (_484 _512))) ((($C' ($S' $C)) ((($C' ($C' $C)) ((($C' ($C' ($C' $S'))) (($B ($B ($B $C))) ((($C' ($C' ($C' ($C' $C)))) ((($C' ($C' ($C' ($C' ($S' $B))))) ((($C' ($C' ($C' ($C' ($C' $S))))) ((($C' ($C' ($C' $B))) (($B ($B ($B ($C' $C)))) ((($C' ($C' ($C' ($C' ($C' $S'))))) (($B ($B ($B ($B ($B $C))))) ((($C' ($C' ($C' ($C' ($C' ($C' $B)))))) ((($C' ($C' ($C' ($S' ($C' $B))))) (($B ($B ($B ($B ($B $B))))) ((($C' ($C' ($C' ($C' $B)))) ((($S' $B) (($B $B) (($B $B) (($B $C') (($B ($S' _485)) ((($C' $B) (($B _569) (($B _502) ((($C' _606) _8) 0)))) (($B (_569 _505)) (($B (_516 (($O 116) (($O 111) (($O 112) (($O 32) (($O 108) (($O 101) (($O 118) (($O 101) (($O 108) (($O 32) (($O 100) (($O 101) (($O 102) (($O 110) (($O 115) (($O 58) (($O 32) $K))))))))))))))))))) _466)))))))) ((($S' $B) (($B $B) (($B ($C' $B)) (($B ($B $B)) (($B ($B _485)) ((($C' $B) (($B _569) (($B _502) ((($C' _606) _8) 1)))) (_501 ($T (($B ($B (_569 _505))) ((($C' $B) _516) (($B (_516 (($O 32) (($O 61) (($O 32) $K))))) _229))))))))))) ((($C' $B) ((($S' $C') (($B $C') (($B $C') _9))) ((($S' $B) (($B ($C' ($C' _485))) ((($C' $B) (($B $B) (($B _569) (($B _507) _11)))) (($B ($B (_516 _1))) (($B (($C' _516) _466)) (_516 (($O 10) $K))))))) (($B ($B (_484 _512))) ((($C' $B) (($B $B) (($B _569) (($B _502) ((($C' _606) _8) 0))))) (($B ($B (_569 _505))) (($B ($B (_516 (($O 102) (($O 105) (($O 110) (($O 97) (($O 108) (($O 32) (($O 112) (($O 97) (($O 115) (($O 115) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) $K))))))))))))))))))))))))) ((($C' ($C' _516)) (($B ($B (_479 6))) (($B ($B _466)) _600))) (($O 109) (($O 115) $K)))))))))) _3)))) _463))) (($B (($C' $C) (($B ($C _521)) _229))) (($C _534) (_549 0))))) (($B ($C $B)) (($B ($B ($C $B))) (($B ($B ($B $K))) (($B ($B ($B ($B (_516 (($O 40) (($O 40) (($O 36) (($O 65) (($O 32) (($O 58) $K))))))))))) (($B ($B (($C' $B) (($B _516) _466)))) (($B ($B ($B (_516 (($O 32) $K))))) ((($C' $B) (($B ($C' _516)) ($B _229))) (($B (_516 (($O 41) (($O 32) $K)))) (($C _516) (($O 41) $K))))))))))))) (($B $Y) ((($C' ($C' $S)) ((($C' ($C' $S)) ((($C' $B) $P) ((($S' ($C' $B)) ($B _206)) $I))) (($B $K) $K))) $K))))) $T)) (($B (($S' _569) (($B _566) (($B (_569 _614)) (($B (_516 (($O 109) (($O 97) (($O 105) (($O 110) (($O 58) (($O 32) (($O 102) (($O 105) (($O 110) (($O 100) (($O 73) (($O 100) (($O 101) (($O 110) (($O 116) (($O 58) (($O 32) $K))))))))))))))))))) _298))))) ($C _456)))) (($B ($B _460)) (($B (($C' _518) (($B $T) (($B ($C $B)) (($B ($B ($B $K))) ((($C' ($C' ($C' $O))) ($B ($C $P))) $K)))))) (($C _534) (_549 0)))))) (($B (_569 _205)) (($B (_516 (($O 95) $K))) _466))))) ($T $A))) ($T $K))) (($B $Y) $K)))))) (($S (($S ((($S' _7) (($B _533) (_520 (_477 (($O 45) (($O 118) $K)))))) ((_548 _477) (($O 45) (($O 114) $K))))) (($B (_514 (($O 46) $K))) (($B _568) (_519 ((_538 _591) (($O 45) (($O 105) $K)))))))) (($B (_569 _543)) ((($C' _516) (($B _568) (_519 ((_538 _591) (($O 45) (($O 111) $K)))))) (($O (($O 111) (($O 117) (($O 116) (($O 46) (($O 99) (($O 111) (($O 109) (($O 98) $K))))))))) $K))))) (($B (($S (($C ((($C' _602) _533) 1)) (_614 (($O 85) (($O 115) (($O 97) (($O 103) (($O 101) (($O 58) (($O 32) (($O 117) (($O 104) (($O 115) (($O 32) (($O 91) (($O 45) (($O 118) (($O 93) (($O 32) (($O 91) (($O 45) (($O 114) (($O 93) (($O 32) (($O 91) (($O 45) (($O 105) (($O 80) (($O 65) (($O 84) (($O 72) (($O 93) (($O 32) (($O 91) (($O 45) (($O 111) (($O 70) (($O 73) (($O 76) (($O 69) (($O 93) (($O 32) (($O 77) (($O 111) (($O 100) (($O 117) (($O 108) (($O 101) (($O 78) (($O 97) (($O 109) (($O 101) $K)))))))))))))))))))))))))))))))))))))))))))))))))))) _543)) (_520 ((_570 _611) ((_570 (_477 (($O 45) $K))) (_531 1)))))))) (($A :1 (($O 118) (($O 50) (($O 46) (($O 50) (($O 10) $K)))))) (($A :2 ((($S' ($S' _484)) _16) (($B ($B ($B (_484 _512)))) ((($C' ($C' $B)) (($B ($B ($C' (($S' _485) (($B (_569 _503)) (($B (_569 (_532 1000000))) _38)))))) (($B ($B ($B ($B (_484 _512))))) ((($C' $B) (($B ($C' $B)) (($B ($B ($C' _485))) ((($
\ No newline at end of file
+v3.0
+658
+(($A :0 ((_486 _440) ((($S' ($C ((($C' ($S' _486)) ($C _2)) (($B ($B (_486 _514))) ((($C' ($S' $C)) ((($C' ($C' $C)) ((($C' ($C' ($C' $S'))) (($B ($B ($B $C))) ((($C' ($C' ($C' ($C' $C)))) ((($C' ($C' ($C' ($C' ($S' $B))))) ((($C' ($C' ($C' ($C' ($C' $S))))) ((($C' ($C' ($C' $B))) (($B ($B ($B ($C' $C)))) ((($C' ($C' ($C' ($C' ($C' $S'))))) (($B ($B ($B ($B ($B $C))))) ((($C' ($C' ($C' ($C' ($C' ($C' $B)))))) ((($C' ($C' ($C' ($S' ($C' $B))))) (($B ($B ($B ($B ($B $B))))) ((($C' ($C' ($C' ($C' $B)))) ((($S' $B) (($B $B) (($B $B) (($B $C') (($B ($S' _487)) ((($C' $B) (($B _571) (($B _504) ((($C' _608) _8) 0)))) (($B (_571 _507)) (($B (_518 "top level defns: ")) _468)))))))) ((($S' $B) (($B $B) (($B ($C' $B)) (($B ($B $B)) (($B ($B _487)) ((($C' $B) (($B _571) (($B _504) ((($C' _608) _8) 1)))) (_503 ($T (($B ($B (_571 _507))) ((($C' $B) _518) (($B (_518 " = ")) _229))))))))))) ((($C' $B) ((($S' $C') (($B $C') (($B $C') _9))) ((($S' $B) (($B ($C' ($C' _487))) ((($C' $B) (($B $B) (($B _571) (($B _509) _11)))) (($B ($B (_518 _1))) (($B (($C' _518) _468)) (_518 (($O 10) $K))))))) (($B ($B (_486 _514))) ((($C' $B) (($B $B) (($B _571) (($B _504) ((($C' _608) _8) 0))))) (($B ($B (_571 _507))) (($B ($B (_518 "final pass            "))) ((($C' ($C' _518)) (($B ($B (_481 6))) (($B ($B _468)) _602))) "ms")))))))) _3)))) _465))) (($B (($C' $C) (($B ($C _523)) _229))) (($C _536) (_551 0))))) (($B ($C $B)) (($B ($B ($C $B))) (($B ($B ($B $K))) (($B ($B ($B ($B (_518 "(($A :"))))) (($B ($B (($C' $B) (($B _518) _468)))) (($B ($B ($B (_518 (($O 32) $K))))) ((($C' $B) (($B ($C' _518)) ($B _229))) (($B (_518 ") ")) (($C _518) (($O 41) $K))))))))))))) (($B $Y) ((($C' ($C' $S)) ((($C' ($C' $S)) ((($C' $B) $P) ((($S' ($C' $B)) ($B _206)) $I))) (($B $K) $K))) $K))))) $T)) (($B (($S' _571) (($B _568) (($B (_571 _616)) (($B (_518 "main: findIdent: ")) _300))))) ($C _458)))) (($B ($B _462)) (($B (($C' _520) (($B $T) (($B ($C $B)) (($B ($B ($B $K))) ((($C' ($C' ($C' $O))) ($B ($C $P))) $K)))))) (($C _536) (_551 0)))))) (($B (_571 _205)) (($B (_518 (($O 95) $K))) _468))))) ($T $A))) ($T $K))) (($B $Y) $K)))))) (($S (($S ((($S' _7) (($B _535) (_522 (_479 "-v")))) ((_550 _479) "-r"))) (($B (_516 (($O 46) $K))) (($B _570) (_521 ((_540 _593) "-i")))))) (($B (_571 _545)) ((($C' _518) (($B _570) (_521 ((_540 _593) "-o")))) (($O "out.comb") $K))))) (($B (($S (($C ((($C' _604) _535) 1)) (_616 "Usage: uhs [-v] [-r] [-iPATH] [-oFILE] ModuleName"))) _545)) (_522 ((_572 _613) ((_572 (_479 (($O 45) $K))) (_533 1)))))))) (($A :1 "v3.0\10&") (($A :2 ((($S' ($S' _486)) _16) (($B ($B ($B (_486 _514)))) ((($C' ($C' $B)) (($B ($B ($C' (($S' _487) (($B (_571 _505)) (($B (_571 (_534 1000000))) _38)))))) (($B ($B ($B ($B (_486 _514))))) ((($C' $B) (($B ($C' $B)) (($B ($B ($C' _487))) ((($C' $B) (($B $B) (($B _571) (($B _504) ((($C' _608) _8) 0))))) (($B ($B (_571 _507))) (($B ($B (_518 "combinator conversion "))) ((($C' ($C' _518)) (($B ($B (_481 6))) (($B ($B _468)) _602))) "ms"))))))) (($B ($B _488)) (($B $P) (($C _302) "main"))))))) (_520 ($T ((($C' ($C' $O)) ((($C' $B) $P) _232)) $K))))))) (($A :3 ($T (($C ((($C' $C') (($B ($S' ($B (_571 _6)))) ($C $C))) (($B ($B $Y)) (($B ($B ($B _199))) (($C' ($C' _520)) (($B ($B $T)) ((($C' ($C' ($C' ($C' $O)))) (($B ($B (($C' $B) $P))) ($B _4))) $K))))))) (($B (($S' _571) (($B _568) (($B (_571 _616)) (_518 "not found "))))) ($C _200))))) (($A :4 ((($C' $C) ((($S' $C) ((($C' ($C' $S')) (($S $P) ((($S' ($C' $B)) (($B ($B _6)) _4)) _4))) (($B $K) $K))) ((($C' ($C' $C)) (($B (($C' $C) (($B ($P _6)) $K))) ((($C' $B) _4) _231))) (($B (_571 (_568 (_616 "primlookup")))) (($C (_554 _479)) _5))))) (_616 "trans: impossible"))) (($A :5 (($O (($P (($O 66) $K)) $B)) (($O (($P (($O 79) $K)) $O)) (($O (($P (($O 75) $K)) $K)) (($O (($P "C'") $C')) (($O (($P (($O 67) $K)) $C)) (($O (($P (($O 65) $K)) $A)) (($O (($P "S'") $S')) (($O (($P (($O 80) $K)) $P)) (($O (($P (($O 73) $K)) $I)) (($O (($P (($O 83) $K)) $S)) (($O (($P (($O 84) $K)) $T)) (($O (($P (($O 89) $K)) $Y)) (($O (($P "B'") $B')) (($O (($P (($O 43) $K)) $+)) (($O (($P (($O 45) $K)) $-))
\ No newline at end of file
--- a/src/Compat.hs
+++ b/src/Compat.hs
@@ -17,6 +17,9 @@
 neChar :: Char -> Char -> Bool
 neChar = (/=)
 
+ltChar :: Char -> Char -> Bool
+ltChar = (<)
+
 eqString :: String -> String -> Bool
 eqString = (==)
 
--- a/src/MicroHs/Desugar.hs
+++ b/src/MicroHs/Desugar.hs
@@ -75,7 +75,7 @@
     EApp f a -> App (dsExpr f) (dsExpr a)
     ELam xs e -> dsLam xs e
     ELit (LChar c) -> Lit (LInt (ord c))
-    ELit (LStr cs) -> dsExpr $ EList $ map (ELit . LChar) cs
+--    ELit (LStr cs) -> dsExpr $ EList $ map (ELit . LChar) cs
     ELit l -> Lit l
     ECase e as -> dsCase e as
 -- For now, just sequential bindings; each recursive
--- a/src/MicroHs/Exp.hs
+++ b/src/MicroHs/Exp.hs
@@ -5,9 +5,11 @@
   substExp,
   Exp(..), showExp, toStringP,
   PrimOp,
+  encodeString,
   app2, cCons, cNil, cFlip
   ) where
 import Prelude
+import Data.Char
 import Data.List
 import MicroHs.Parse
 --Ximport Compat
@@ -117,9 +119,29 @@
 toStringP ae =
   case ae of
     Var x   -> x
+    Lit (LStr s) ->
+      -- Encode very short string directly as combinators.
+      if length s > 1 then
+        quoteString s
+      else
+        toStringP (encodeString s)
     Lit l   -> showLit l
     Lam x e -> "(\\" ++ x ++ " " ++ toStringP e ++ ")"
     App f a -> "(" ++ toStringP f ++ " " ++ toStringP a ++ ")"
+
+quoteString :: String -> String
+quoteString s =
+  let
+    char c =
+      if eqChar c '"' || eqChar c '\\' || ltChar c ' ' || ltChar '~' c then
+        '\\' : showInt (ord c) ++ ['&']
+      else
+        [c]
+  in '"' : concatMap char s ++ ['"']
+
+encodeString :: String -> Exp
+encodeString [] = cNil
+encodeString (c:cs) = app2 cCons (Lit (LInt (ord c))) (encodeString cs)
 
 compileOpt :: Exp -> Exp
 compileOpt = improveT . compileExp
--- a/src/MicroHs/Main.hs
+++ b/src/MicroHs/Main.hs
@@ -63,7 +63,7 @@
       putStrLn $ "final pass            " ++ padLeft 6 (showInt (t2-t1)) ++ "ms"
 
 version :: String
-version = "v2.2\n"
+version = "v3.0\n"
 
 type Program = (Ident, [LDef])
 
--- a/src/MicroHs/Translate.hs
+++ b/src/MicroHs/Translate.hs
@@ -30,6 +30,7 @@
     Var n -> r n
     App f a -> unsafeCoerce (trans r f) (trans r a)
     Lit (LInt i) -> unsafeCoerce i
+    Lit (LStr s) -> trans r (encodeString s)
     Lit (LPrim p) -> fromMaybe (error "primlookup") $ lookupBy eqString p primTable
     _ -> error "trans: impossible"
 
--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -55,7 +55,7 @@
 #define FASTTAGS 1
 #define UNIONPTR 1
 
-#define VERSION "v2.2\n"
+#define VERSION "v3.0\n"
 
 #define HEAP_CELLS 100000
 #define STACK_SIZE 10000
@@ -64,11 +64,12 @@
 
 enum node_tag { T_FREE, T_IND, T_AP, T_INT, T_HDL, T_S, T_K, T_I, T_B, T_C, /* 0 - 9 */
                 T_A, T_Y, T_SS, T_BB, T_CC, T_P, T_O, T_T, T_ADD, T_SUB, T_MUL,  /* 10 - 20 */
-                T_QUOT, T_REM, T_SUBR, T_EQ, T_NE, T_LT, T_LE, T_GT, T_GE, T_ERROR, /* 21-30 */
-                T_IO_BIND, T_IO_THEN, T_IO_RETURN, T_IO_GETCHAR, T_IO_PUTCHAR, /* 31-35 */
-                T_IO_SERIALIZE, T_IO_DESERIALIZE, T_IO_OPEN, T_IO_CLOSE, T_IO_ISNULLHANDLE, /* 36-40 */
-                T_IO_STDIN, T_IO_STDOUT, T_IO_STDERR, T_IO_GETARGS, T_IO_PERFORMIO, /* 41-45 */
+                T_QUOT, T_REM, T_SUBR, T_EQ, T_NE, T_LT, T_LE, T_GT, T_GE, T_ERROR, /* 21 - 30 */
+                T_IO_BIND, T_IO_THEN, T_IO_RETURN, T_IO_GETCHAR, T_IO_PUTCHAR, /* 31 - 35 */
+                T_IO_SERIALIZE, T_IO_DESERIALIZE, T_IO_OPEN, T_IO_CLOSE, T_IO_ISNULLHANDLE, /* 36 - 40 */
+                T_IO_STDIN, T_IO_STDOUT, T_IO_STDERR, T_IO_GETARGS, T_IO_PERFORMIO, /* 41 - 45 */
                 T_IO_GETTIMEMILLI, T_IO_PRINT, /* 46 - 47 */
+                T_STR,                         /* 48 */
                 T_LAST_TAG,
 };
 
@@ -82,6 +83,7 @@
   union {
     value_t value;
     FILE *file;
+    const char *string;
     struct {
       struct node *fun;
       struct node *arg;
@@ -117,6 +119,7 @@
     struct node *uuarg;
     value_t uuvalue;
     FILE *uufile;
+    const char *uustring;
   } uarg;
 } node;
 typedef struct node* NODEPTR;
@@ -128,6 +131,7 @@
 #define SETVALUE(p,v) (p)->uarg.uuvalue = v
 #define FUN(p) (p)->ufun.uufun
 #define ARG(p) (p)->uarg.uuarg
+#define STR(p) (p)->uarg.uustring
 #define INDIR(p) ARG(p)
 #define HANDLE(p) (p)->uarg.uufile
 #define NODE_SIZE sizeof(node)
@@ -565,6 +569,14 @@
   return i;
 }
 
+NODEPTR
+mkStrNode(const char *str)
+{
+  NODEPTR n = alloc_node(T_STR);
+  STR(n) = str;
+  return n;
+}
+
 /* Table of labelled nodes for sharing during parsing. */
 struct shared_entry {
   uint64_t label;
@@ -677,6 +689,31 @@
     r = parse(f);
     INDIR(*nodep) = r;
     return r;
+  case '"' :
+    /* Everything up to the next " is a string.
+     * Special characters are encoded as \NNN&,
+     * where NNN is the decimal value of the character */
+    /* XXX assume there are no NULs in the string, and all fit in a char */
+    /* XXX allocation is a hack */
+    {
+      char *buffer = malloc(10000);
+      char *p = buffer;
+      for(;;) {
+        c = getc(f);
+        if (c == '"')
+          break;
+        if (c == '\\') {
+          *p++ = (char)parse_int(f);
+          if (!gobble(f, '&'))
+            ERR("parse string");
+        } else {
+          *p++ = c;
+        }
+      }
+      *p++ = 0;
+      r = mkStrNode(realloc(buffer, p - buffer));
+      return r;
+    }
   default:
     fprintf(stderr, "parse '%c'\n", c);
     ERR("parse default");
@@ -804,6 +841,19 @@
     fputc(')', f);
     break;
   case T_INT: fprintf(f, "%"PRIu64, GETVALUE(n)); break;
+  case T_STR:
+    {
+      const char *p = STR(n);
+      int c;
+      fputc('"', f);
+      while ((c = *p++)) {
+        if (c == '"' || c == '\\' || c < ' ' || c > '~') {
+          fprintf(f, "\\%d&", c);
+        }
+      }
+      fputc('"', f);
+      break;
+    }
   case T_HDL:
     if (HANDLE(n) == stdin)
       fprintf(f, "$IO.stdin");
@@ -885,6 +935,43 @@
   fprintf(f, "\n");
 }
 
+NODEPTR
+mkNil(void)
+{
+  return combFalse;
+}
+
+NODEPTR
+mkCons(NODEPTR x, NODEPTR xs)
+{
+  return new_ap(new_ap(combCons, x), xs);
+}
+
+size_t
+strNodes(size_t len)
+{
+  /* Each character will need a CHAR node and a CONS node, a CONS uses 2 T_AP nodes */
+  len *= (1 + 2);
+  /* And each string will need a NIL */
+  len += 1;
+  return len;
+}
+
+/* Turn a C string into a combinator string */
+NODEPTR
+mkString(const char *str, size_t len)
+{
+  NODEPTR n, nc;
+
+  n = mkNil();
+  for(size_t i = len; i > 0; i--) {
+    nc = alloc_node(T_INT);
+    SETVALUE(nc, str[i-1]);
+    n = mkCons(nc, n);
+  }
+  return n;
+}
+
 void eval(NODEPTR n);
 
 /* Evaluate and skip indirections. */
@@ -1014,11 +1101,13 @@
     enum node_tag tag = l < T_IO_BIND ? l : GETTAG(n);
     switch (tag) {
     ind:
+    num_reductions++;
     case T_IND:
       n = INDIR(n);
       TOP(0) = n;
       break;
     ap:
+    num_reductions++;
     case T_AP:
       n = FUN(n);
       PUSH(n);
@@ -1026,6 +1115,11 @@
     case T_INT:
     case T_HDL:
       RET;
+    case T_STR:
+      GCCHECK(strNodes(strlen(STR(n))));
+      x = mkString(STR(n), strlen(STR(n)));
+      SETIND(n, x);
+      goto ind;
     case T_S:                     /* S f g x = f x (g x) */
       CHECK(3);
       GCCHECK(2);
@@ -1226,43 +1320,6 @@
       ERR("eval tag");
     }
   }
-}
-
-NODEPTR
-mkNil(void)
-{
-  return combFalse;
-}
-
-NODEPTR
-mkCons(NODEPTR x, NODEPTR xs)
-{
-  return new_ap(new_ap(combCons, x), xs);
-}
-
-size_t
-strNodes(size_t len)
-{
-  /* Each character will need a CHAR node and a CONS node, a CONS uses 2 T_AP nodes */
-  len *= (1 + 2);
-  /* And each string will need a NIL */
-  len += 1;
-  return len;
-}
-
-/* Turn a C string into a combinator string */
-NODEPTR
-mkString(const char *str, size_t len)
-{
-  NODEPTR n, nc;
-
-  n = mkNil();
-  for(size_t i = len; i > 0; i--) {
-    nc = alloc_node(T_INT);
-    SETVALUE(nc, str[i-1]);
-    n = mkCons(nc, n);
-  }
-  return n;
 }
 
 /* This is the interpreter for the IO monad operations. */
--