ref: 662c9dd76afa73d6f0ed6c9e235d4e9ad071edbf
parent: 3aaa147408b07558240fe10c8eb2e7b29ef45006
author: Lennart Augustsson <lennart.augustsson@epicgames.com>
date: Mon Aug 21 14:15:59 EDT 2023
Use string literals in combinator file.
--- a/comb/mhs.comb
+++ b/comb/mhs.comb
@@ -1,3 +1,3 @@
-v2.2
-656
-(($A :0 ((_484 _438) ((($S' ($C ((($C' ($S' _484)) ($C _2)) (($B ($B (_484 _512))) ((($C' ($S' $C)) ((($C' ($C' $C)) ((($C' ($C' ($C' $S'))) (($B ($B ($B $C))) ((($C' ($C' ($C' ($C' $C)))) ((($C' ($C' ($C' ($C' ($S' $B))))) ((($C' ($C' ($C' ($C' ($C' $S))))) ((($C' ($C' ($C' $B))) (($B ($B ($B ($C' $C)))) ((($C' ($C' ($C' ($C' ($C' $S'))))) (($B ($B ($B ($B ($B $C))))) ((($C' ($C' ($C' ($C' ($C' ($C' $B)))))) ((($C' ($C' ($C' ($S' ($C' $B))))) (($B ($B ($B ($B ($B $B))))) ((($C' ($C' ($C' ($C' $B)))) ((($S' $B) (($B $B) (($B $B) (($B $C') (($B ($S' _485)) ((($C' $B) (($B _569) (($B _502) ((($C' _606) _8) 0)))) (($B (_569 _505)) (($B (_516 (($O 116) (($O 111) (($O 112) (($O 32) (($O 108) (($O 101) (($O 118) (($O 101) (($O 108) (($O 32) (($O 100) (($O 101) (($O 102) (($O 110) (($O 115) (($O 58) (($O 32) $K))))))))))))))))))) _466)))))))) ((($S' $B) (($B $B) (($B ($C' $B)) (($B ($B $B)) (($B ($B _485)) ((($C' $B) (($B _569) (($B _502) ((($C' _606) _8) 1)))) (_501 ($T (($B ($B (_569 _505))) ((($C' $B) _516) (($B (_516 (($O 32) (($O 61) (($O 32) $K))))) _229))))))))))) ((($C' $B) ((($S' $C') (($B $C') (($B $C') _9))) ((($S' $B) (($B ($C' ($C' _485))) ((($C' $B) (($B $B) (($B _569) (($B _507) _11)))) (($B ($B (_516 _1))) (($B (($C' _516) _466)) (_516 (($O 10) $K))))))) (($B ($B (_484 _512))) ((($C' $B) (($B $B) (($B _569) (($B _502) ((($C' _606) _8) 0))))) (($B ($B (_569 _505))) (($B ($B (_516 (($O 102) (($O 105) (($O 110) (($O 97) (($O 108) (($O 32) (($O 112) (($O 97) (($O 115) (($O 115) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) (($O 32) $K))))))))))))))))))))))))) ((($C' ($C' _516)) (($B ($B (_479 6))) (($B ($B _466)) _600))) (($O 109) (($O 115) $K)))))))))) _3)))) _463))) (($B (($C' $C) (($B ($C _521)) _229))) (($C _534) (_549 0))))) (($B ($C $B)) (($B ($B ($C $B))) (($B ($B ($B $K))) (($B ($B ($B ($B (_516 (($O 40) (($O 40) (($O 36) (($O 65) (($O 32) (($O 58) $K))))))))))) (($B ($B (($C' $B) (($B _516) _466)))) (($B ($B ($B (_516 (($O 32) $K))))) ((($C' $B) (($B ($C' _516)) ($B _229))) (($B (_516 (($O 41) (($O 32) $K)))) (($C _516) (($O 41) $K))))))))))))) (($B $Y) ((($C' ($C' $S)) ((($C' ($C' $S)) ((($C' $B) $P) ((($S' ($C' $B)) ($B _206)) $I))) (($B $K) $K))) $K))))) $T)) (($B (($S' _569) (($B _566) (($B (_569 _614)) (($B (_516 (($O 109) (($O 97) (($O 105) (($O 110) (($O 58) (($O 32) (($O 102) (($O 105) (($O 110) (($O 100) (($O 73) (($O 100) (($O 101) (($O 110) (($O 116) (($O 58) (($O 32) $K))))))))))))))))))) _298))))) ($C _456)))) (($B ($B _460)) (($B (($C' _518) (($B $T) (($B ($C $B)) (($B ($B ($B $K))) ((($C' ($C' ($C' $O))) ($B ($C $P))) $K)))))) (($C _534) (_549 0)))))) (($B (_569 _205)) (($B (_516 (($O 95) $K))) _466))))) ($T $A))) ($T $K))) (($B $Y) $K)))))) (($S (($S ((($S' _7) (($B _533) (_520 (_477 (($O 45) (($O 118) $K)))))) ((_548 _477) (($O 45) (($O 114) $K))))) (($B (_514 (($O 46) $K))) (($B _568) (_519 ((_538 _591) (($O 45) (($O 105) $K)))))))) (($B (_569 _543)) ((($C' _516) (($B _568) (_519 ((_538 _591) (($O 45) (($O 111) $K)))))) (($O (($O 111) (($O 117) (($O 116) (($O 46) (($O 99) (($O 111) (($O 109) (($O 98) $K))))))))) $K))))) (($B (($S (($C ((($C' _602) _533) 1)) (_614 (($O 85) (($O 115) (($O 97) (($O 103) (($O 101) (($O 58) (($O 32) (($O 117) (($O 104) (($O 115) (($O 32) (($O 91) (($O 45) (($O 118) (($O 93) (($O 32) (($O 91) (($O 45) (($O 114) (($O 93) (($O 32) (($O 91) (($O 45) (($O 105) (($O 80) (($O 65) (($O 84) (($O 72) (($O 93) (($O 32) (($O 91) (($O 45) (($O 111) (($O 70) (($O 73) (($O 76) (($O 69) (($O 93) (($O 32) (($O 77) (($O 111) (($O 100) (($O 117) (($O 108) (($O 101) (($O 78) (($O 97) (($O 109) (($O 101) $K)))))))))))))))))))))))))))))))))))))))))))))))))))) _543)) (_520 ((_570 _611) ((_570 (_477 (($O 45) $K))) (_531 1)))))))) (($A :1 (($O 118) (($O 50) (($O 46) (($O 50) (($O 10) $K)))))) (($A :2 ((($S' ($S' _484)) _16) (($B ($B ($B (_484 _512)))) ((($C' ($C' $B)) (($B ($B ($C' (($S' _485) (($B (_569 _503)) (($B (_569 (_532 1000000))) _38)))))) (($B ($B ($B ($B (_484 _512))))) ((($C' $B) (($B ($C' $B)) (($B ($B ($C' _485))) ((($
\ No newline at end of file
+v3.0
+658
+(($A :0 ((_486 _440) ((($S' ($C ((($C' ($S' _486)) ($C _2)) (($B ($B (_486 _514))) ((($C' ($S' $C)) ((($C' ($C' $C)) ((($C' ($C' ($C' $S'))) (($B ($B ($B $C))) ((($C' ($C' ($C' ($C' $C)))) ((($C' ($C' ($C' ($C' ($S' $B))))) ((($C' ($C' ($C' ($C' ($C' $S))))) ((($C' ($C' ($C' $B))) (($B ($B ($B ($C' $C)))) ((($C' ($C' ($C' ($C' ($C' $S'))))) (($B ($B ($B ($B ($B $C))))) ((($C' ($C' ($C' ($C' ($C' ($C' $B)))))) ((($C' ($C' ($C' ($S' ($C' $B))))) (($B ($B ($B ($B ($B $B))))) ((($C' ($C' ($C' ($C' $B)))) ((($S' $B) (($B $B) (($B $B) (($B $C') (($B ($S' _487)) ((($C' $B) (($B _571) (($B _504) ((($C' _608) _8) 0)))) (($B (_571 _507)) (($B (_518 "top level defns: ")) _468)))))))) ((($S' $B) (($B $B) (($B ($C' $B)) (($B ($B $B)) (($B ($B _487)) ((($C' $B) (($B _571) (($B _504) ((($C' _608) _8) 1)))) (_503 ($T (($B ($B (_571 _507))) ((($C' $B) _518) (($B (_518 " = ")) _229))))))))))) ((($C' $B) ((($S' $C') (($B $C') (($B $C') _9))) ((($S' $B) (($B ($C' ($C' _487))) ((($C' $B) (($B $B) (($B _571) (($B _509) _11)))) (($B ($B (_518 _1))) (($B (($C' _518) _468)) (_518 (($O 10) $K))))))) (($B ($B (_486 _514))) ((($C' $B) (($B $B) (($B _571) (($B _504) ((($C' _608) _8) 0))))) (($B ($B (_571 _507))) (($B ($B (_518 "final pass "))) ((($C' ($C' _518)) (($B ($B (_481 6))) (($B ($B _468)) _602))) "ms")))))))) _3)))) _465))) (($B (($C' $C) (($B ($C _523)) _229))) (($C _536) (_551 0))))) (($B ($C $B)) (($B ($B ($C $B))) (($B ($B ($B $K))) (($B ($B ($B ($B (_518 "(($A :"))))) (($B ($B (($C' $B) (($B _518) _468)))) (($B ($B ($B (_518 (($O 32) $K))))) ((($C' $B) (($B ($C' _518)) ($B _229))) (($B (_518 ") ")) (($C _518) (($O 41) $K))))))))))))) (($B $Y) ((($C' ($C' $S)) ((($C' ($C' $S)) ((($C' $B) $P) ((($S' ($C' $B)) ($B _206)) $I))) (($B $K) $K))) $K))))) $T)) (($B (($S' _571) (($B _568) (($B (_571 _616)) (($B (_518 "main: findIdent: ")) _300))))) ($C _458)))) (($B ($B _462)) (($B (($C' _520) (($B $T) (($B ($C $B)) (($B ($B ($B $K))) ((($C' ($C' ($C' $O))) ($B ($C $P))) $K)))))) (($C _536) (_551 0)))))) (($B (_571 _205)) (($B (_518 (($O 95) $K))) _468))))) ($T $A))) ($T $K))) (($B $Y) $K)))))) (($S (($S ((($S' _7) (($B _535) (_522 (_479 "-v")))) ((_550 _479) "-r"))) (($B (_516 (($O 46) $K))) (($B _570) (_521 ((_540 _593) "-i")))))) (($B (_571 _545)) ((($C' _518) (($B _570) (_521 ((_540 _593) "-o")))) (($O "out.comb") $K))))) (($B (($S (($C ((($C' _604) _535) 1)) (_616 "Usage: uhs [-v] [-r] [-iPATH] [-oFILE] ModuleName"))) _545)) (_522 ((_572 _613) ((_572 (_479 (($O 45) $K))) (_533 1)))))))) (($A :1 "v3.0\10&") (($A :2 ((($S' ($S' _486)) _16) (($B ($B ($B (_486 _514)))) ((($C' ($C' $B)) (($B ($B ($C' (($S' _487) (($B (_571 _505)) (($B (_571 (_534 1000000))) _38)))))) (($B ($B ($B ($B (_486 _514))))) ((($C' $B) (($B ($C' $B)) (($B ($B ($C' _487))) ((($C' $B) (($B $B) (($B _571) (($B _504) ((($C' _608) _8) 0))))) (($B ($B (_571 _507))) (($B ($B (_518 "combinator conversion "))) ((($C' ($C' _518)) (($B ($B (_481 6))) (($B ($B _468)) _602))) "ms"))))))) (($B ($B _488)) (($B $P) (($C _302) "main"))))))) (_520 ($T ((($C' ($C' $O)) ((($C' $B) $P) _232)) $K))))))) (($A :3 ($T (($C ((($C' $C') (($B ($S' ($B (_571 _6)))) ($C $C))) (($B ($B $Y)) (($B ($B ($B _199))) (($C' ($C' _520)) (($B ($B $T)) ((($C' ($C' ($C' ($C' $O)))) (($B ($B (($C' $B) $P))) ($B _4))) $K))))))) (($B (($S' _571) (($B _568) (($B (_571 _616)) (_518 "not found "))))) ($C _200))))) (($A :4 ((($C' $C) ((($S' $C) ((($C' ($C' $S')) (($S $P) ((($S' ($C' $B)) (($B ($B _6)) _4)) _4))) (($B $K) $K))) ((($C' ($C' $C)) (($B (($C' $C) (($B ($P _6)) $K))) ((($C' $B) _4) _231))) (($B (_571 (_568 (_616 "primlookup")))) (($C (_554 _479)) _5))))) (_616 "trans: impossible"))) (($A :5 (($O (($P (($O 66) $K)) $B)) (($O (($P (($O 79) $K)) $O)) (($O (($P (($O 75) $K)) $K)) (($O (($P "C'") $C')) (($O (($P (($O 67) $K)) $C)) (($O (($P (($O 65) $K)) $A)) (($O (($P "S'") $S')) (($O (($P (($O 80) $K)) $P)) (($O (($P (($O 73) $K)) $I)) (($O (($P (($O 83) $K)) $S)) (($O (($P (($O 84) $K)) $T)) (($O (($P (($O 89) $K)) $Y)) (($O (($P "B'") $B')) (($O (($P (($O 43) $K)) $+)) (($O (($P (($O 45) $K)) $-))
\ No newline at end of file
--- a/src/Compat.hs
+++ b/src/Compat.hs
@@ -17,6 +17,9 @@
neChar :: Char -> Char -> Bool
neChar = (/=)
+ltChar :: Char -> Char -> Bool
+ltChar = (<)
+
eqString :: String -> String -> Bool
eqString = (==)
--- a/src/MicroHs/Desugar.hs
+++ b/src/MicroHs/Desugar.hs
@@ -75,7 +75,7 @@
EApp f a -> App (dsExpr f) (dsExpr a)
ELam xs e -> dsLam xs e
ELit (LChar c) -> Lit (LInt (ord c))
- ELit (LStr cs) -> dsExpr $ EList $ map (ELit . LChar) cs
+-- ELit (LStr cs) -> dsExpr $ EList $ map (ELit . LChar) cs
ELit l -> Lit l
ECase e as -> dsCase e as
-- For now, just sequential bindings; each recursive
--- a/src/MicroHs/Exp.hs
+++ b/src/MicroHs/Exp.hs
@@ -5,9 +5,11 @@
substExp,
Exp(..), showExp, toStringP,
PrimOp,
+ encodeString,
app2, cCons, cNil, cFlip
) where
import Prelude
+import Data.Char
import Data.List
import MicroHs.Parse
--Ximport Compat
@@ -117,9 +119,29 @@
toStringP ae =
case ae of
Var x -> x
+ Lit (LStr s) ->
+ -- Encode very short string directly as combinators.
+ if length s > 1 then
+ quoteString s
+ else
+ toStringP (encodeString s)
Lit l -> showLit l
Lam x e -> "(\\" ++ x ++ " " ++ toStringP e ++ ")"
App f a -> "(" ++ toStringP f ++ " " ++ toStringP a ++ ")"+
+quoteString :: String -> String
+quoteString s =
+ let
+ char c =
+ if eqChar c '"' || eqChar c '\\' || ltChar c ' ' || ltChar '~' c then
+ '\\' : showInt (ord c) ++ ['&']
+ else
+ [c]
+ in '"' : concatMap char s ++ ['"']
+
+encodeString :: String -> Exp
+encodeString [] = cNil
+encodeString (c:cs) = app2 cCons (Lit (LInt (ord c))) (encodeString cs)
compileOpt :: Exp -> Exp
compileOpt = improveT . compileExp
--- a/src/MicroHs/Main.hs
+++ b/src/MicroHs/Main.hs
@@ -63,7 +63,7 @@
putStrLn $ "final pass " ++ padLeft 6 (showInt (t2-t1)) ++ "ms"
version :: String
-version = "v2.2\n"
+version = "v3.0\n"
type Program = (Ident, [LDef])
--- a/src/MicroHs/Translate.hs
+++ b/src/MicroHs/Translate.hs
@@ -30,6 +30,7 @@
Var n -> r n
App f a -> unsafeCoerce (trans r f) (trans r a)
Lit (LInt i) -> unsafeCoerce i
+ Lit (LStr s) -> trans r (encodeString s)
Lit (LPrim p) -> fromMaybe (error "primlookup") $ lookupBy eqString p primTable
_ -> error "trans: impossible"
--- a/src/runtime/eval.c
+++ b/src/runtime/eval.c
@@ -55,7 +55,7 @@
#define FASTTAGS 1
#define UNIONPTR 1
-#define VERSION "v2.2\n"
+#define VERSION "v3.0\n"
#define HEAP_CELLS 100000
#define STACK_SIZE 10000
@@ -64,11 +64,12 @@
enum node_tag { T_FREE, T_IND, T_AP, T_INT, T_HDL, T_S, T_K, T_I, T_B, T_C, /* 0 - 9 */T_A, T_Y, T_SS, T_BB, T_CC, T_P, T_O, T_T, T_ADD, T_SUB, T_MUL, /* 10 - 20 */
- T_QUOT, T_REM, T_SUBR, T_EQ, T_NE, T_LT, T_LE, T_GT, T_GE, T_ERROR, /* 21-30 */
- T_IO_BIND, T_IO_THEN, T_IO_RETURN, T_IO_GETCHAR, T_IO_PUTCHAR, /* 31-35 */
- T_IO_SERIALIZE, T_IO_DESERIALIZE, T_IO_OPEN, T_IO_CLOSE, T_IO_ISNULLHANDLE, /* 36-40 */
- T_IO_STDIN, T_IO_STDOUT, T_IO_STDERR, T_IO_GETARGS, T_IO_PERFORMIO, /* 41-45 */
+ T_QUOT, T_REM, T_SUBR, T_EQ, T_NE, T_LT, T_LE, T_GT, T_GE, T_ERROR, /* 21 - 30 */
+ T_IO_BIND, T_IO_THEN, T_IO_RETURN, T_IO_GETCHAR, T_IO_PUTCHAR, /* 31 - 35 */
+ T_IO_SERIALIZE, T_IO_DESERIALIZE, T_IO_OPEN, T_IO_CLOSE, T_IO_ISNULLHANDLE, /* 36 - 40 */
+ T_IO_STDIN, T_IO_STDOUT, T_IO_STDERR, T_IO_GETARGS, T_IO_PERFORMIO, /* 41 - 45 */
T_IO_GETTIMEMILLI, T_IO_PRINT, /* 46 - 47 */
+ T_STR, /* 48 */
T_LAST_TAG,
};
@@ -82,6 +83,7 @@
union {value_t value;
FILE *file;
+ const char *string;
struct {struct node *fun;
struct node *arg;
@@ -117,6 +119,7 @@
struct node *uuarg;
value_t uuvalue;
FILE *uufile;
+ const char *uustring;
} uarg;
} node;
typedef struct node* NODEPTR;
@@ -128,6 +131,7 @@
#define SETVALUE(p,v) (p)->uarg.uuvalue = v
#define FUN(p) (p)->ufun.uufun
#define ARG(p) (p)->uarg.uuarg
+#define STR(p) (p)->uarg.uustring
#define INDIR(p) ARG(p)
#define HANDLE(p) (p)->uarg.uufile
#define NODE_SIZE sizeof(node)
@@ -565,6 +569,14 @@
return i;
}
+NODEPTR
+mkStrNode(const char *str)
+{+ NODEPTR n = alloc_node(T_STR);
+ STR(n) = str;
+ return n;
+}
+
/* Table of labelled nodes for sharing during parsing. */
struct shared_entry {uint64_t label;
@@ -677,6 +689,31 @@
r = parse(f);
INDIR(*nodep) = r;
return r;
+ case '"' :
+ /* Everything up to the next " is a string.
+ * Special characters are encoded as \NNN&,
+ * where NNN is the decimal value of the character */
+ /* XXX assume there are no NULs in the string, and all fit in a char */
+ /* XXX allocation is a hack */
+ {+ char *buffer = malloc(10000);
+ char *p = buffer;
+ for(;;) {+ c = getc(f);
+ if (c == '"')
+ break;
+ if (c == '\\') {+ *p++ = (char)parse_int(f);
+ if (!gobble(f, '&'))
+ ERR("parse string");+ } else {+ *p++ = c;
+ }
+ }
+ *p++ = 0;
+ r = mkStrNode(realloc(buffer, p - buffer));
+ return r;
+ }
default:
fprintf(stderr, "parse '%c'\n", c);
ERR("parse default");@@ -804,6 +841,19 @@
fputc(')', f);break;
case T_INT: fprintf(f, "%"PRIu64, GETVALUE(n)); break;
+ case T_STR:
+ {+ const char *p = STR(n);
+ int c;
+ fputc('"', f);+ while ((c = *p++)) {+ if (c == '"' || c == '\\' || c < ' ' || c > '~') {+ fprintf(f, "\\%d&", c);
+ }
+ }
+ fputc('"', f);+ break;
+ }
case T_HDL:
if (HANDLE(n) == stdin)
fprintf(f, "$IO.stdin");
@@ -885,6 +935,43 @@
fprintf(f, "\n");
}
+NODEPTR
+mkNil(void)
+{+ return combFalse;
+}
+
+NODEPTR
+mkCons(NODEPTR x, NODEPTR xs)
+{+ return new_ap(new_ap(combCons, x), xs);
+}
+
+size_t
+strNodes(size_t len)
+{+ /* Each character will need a CHAR node and a CONS node, a CONS uses 2 T_AP nodes */
+ len *= (1 + 2);
+ /* And each string will need a NIL */
+ len += 1;
+ return len;
+}
+
+/* Turn a C string into a combinator string */
+NODEPTR
+mkString(const char *str, size_t len)
+{+ NODEPTR n, nc;
+
+ n = mkNil();
+ for(size_t i = len; i > 0; i--) {+ nc = alloc_node(T_INT);
+ SETVALUE(nc, str[i-1]);
+ n = mkCons(nc, n);
+ }
+ return n;
+}
+
void eval(NODEPTR n);
/* Evaluate and skip indirections. */
@@ -1014,11 +1101,13 @@
enum node_tag tag = l < T_IO_BIND ? l : GETTAG(n);
switch (tag) {ind:
+ num_reductions++;
case T_IND:
n = INDIR(n);
TOP(0) = n;
break;
ap:
+ num_reductions++;
case T_AP:
n = FUN(n);
PUSH(n);
@@ -1026,6 +1115,11 @@
case T_INT:
case T_HDL:
RET;
+ case T_STR:
+ GCCHECK(strNodes(strlen(STR(n))));
+ x = mkString(STR(n), strlen(STR(n)));
+ SETIND(n, x);
+ goto ind;
case T_S: /* S f g x = f x (g x) */
CHECK(3);
GCCHECK(2);
@@ -1226,43 +1320,6 @@
ERR("eval tag");}
}
-}
-
-NODEPTR
-mkNil(void)
-{- return combFalse;
-}
-
-NODEPTR
-mkCons(NODEPTR x, NODEPTR xs)
-{- return new_ap(new_ap(combCons, x), xs);
-}
-
-size_t
-strNodes(size_t len)
-{- /* Each character will need a CHAR node and a CONS node, a CONS uses 2 T_AP nodes */
- len *= (1 + 2);
- /* And each string will need a NIL */
- len += 1;
- return len;
-}
-
-/* Turn a C string into a combinator string */
-NODEPTR
-mkString(const char *str, size_t len)
-{- NODEPTR n, nc;
-
- n = mkNil();
- for(size_t i = len; i > 0; i--) {- nc = alloc_node(T_INT);
- SETVALUE(nc, str[i-1]);
- n = mkCons(nc, n);
- }
- return n;
}
/* This is the interpreter for the IO monad operations. */
--
⑨