shithub: kwa

Download patch

ref: f01859c469dac3ed01d36a656cce03f0b63abae1
parent: 3923757ccbde3af0f253824bc9cf9a99127178fb
author: qwx <qwx@sciops.net>
date: Mon Sep 29 06:34:26 EDT 2025

test: add more tests and missing files, use only part of ucd

--- a/test/T.-f-f
+++ b/test/T.-f-f
@@ -8,15 +8,17 @@
 echo xxx | $awk -f foo1 -f foo2 >foo3
 diff foo foo3 || echo 'BAD: T.-f-f multiple -fs'
 
-echo '/a/' | $awk -f - /lib/ucd/UnicodeData.txt >foo1
-$awk '/a/' /lib/ucd/UnicodeData.txt >foo2
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
+echo '/a/' | $awk -f - foo.txt >foo1
+$awk '/a/' foo.txt >foo2
 diff foo1 foo2 || echo 'BAD: T.-f-f  -f -'
 
-cp /lib/ucd/UnicodeData.txt foo1
+cp foo.txt foo1
 echo '/./ {' >foo2
 echo 'print' >foo3
 echo '}' >foo4
-$awk -f foo2 -f foo3 -f foo4 /lib/ucd/UnicodeData.txt >foo5
+$awk -f foo2 -f foo3 -f foo4 foo.txt >foo5
 diff foo1 foo5 || echo 'BAD: T.-f-f 3 files'
 
 echo '/./ {' >foo2
@@ -26,5 +28,5 @@
 
 
 ]' >foo4
-$awk -f foo2 -f foo3 -f foo4 /lib/ucd/UnicodeData.txt >foo5 >[2]foo6
+$awk -f foo2 -f foo3 -f foo4 foo.txt >foo5 >[2]foo6
 grep 'syntax error.*at foo4:' foo6 >/dev/null >[2=1] || echo 'BAD: T.-f-f source file name'
--- a/test/T.close
+++ b/test/T.close
@@ -1,9 +1,11 @@
 #!/bin/rc
 echo T.close: test close built-in
 
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
 rm -f foo
-$awk '{ print >>"foo"; close("foo") }' /lib/ucd/UnicodeData.txt
-diff /lib/ucd/UnicodeData.txt foo || echo 'BAD: T.close (1)'
+$awk '{ print >>"foo"; close("foo") }' foo.txt
+diff foo.txt foo || echo 'BAD: T.close (1)'
 
 ls -l >foo
 tail -1 foo >foo1
@@ -12,7 +14,7 @@
 
 echo 0 >foo1
 $awk '	# non-accessible file
-  BEGIN { getline <"/lib/ucd/UnicodeData.txt"; print close("/lib/ucd/UnicodeData.txt"); }
+  BEGIN { getline <"foo.txt"; print close("foo.txt"); }
 ' >foo2
 diff foo1 foo2 || echo 'BAD: T.close (3)'
 
--- a/test/T.clv
+++ b/test/T.clv
@@ -134,11 +134,13 @@
 $awk -f foo0 '-vx=123' '-vy=abc' '-vz1=10.99' /dev/null 'x=4567' /dev/null >foo2
 diff foo1 foo2 || echo 'BAD: T.clv (x=15a)'
 
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
 echo 'BEGIN { print x, y, z1 }
 NR==1 { print x }' >foo0
 echo '123 abc 10.99
 4567' >foo1
-$awk -v 'x=123' -v 'y=abc' -v 'z1=10.99' -f foo0 'x=4567' /lib/ucd/UnicodeData.txt >foo2
+$awk -v 'x=123' -v 'y=abc' -v 'z1=10.99' -f foo0 'x=4567' foo.txt >foo2
 diff foo1 foo2 || echo 'BAD: T.clv (x=16)'
 
 echo 'BEGIN { print x, y, z1 }
@@ -145,11 +147,9 @@
 NR==1 { print x }' >foo0
 echo '123 abc 10.99
 4567' >foo1
-$awk '-vx=123' '-vy=abc' '-vz1=10.99' -f foo0 'x=4567' /lib/ucd/UnicodeData.txt >foo2
+$awk '-vx=123' '-vy=abc' '-vz1=10.99' -f foo0 'x=4567' foo.txt >foo2
 diff foo1 foo2 || echo 'BAD: T.clv (x=16a)'
 
-
-
 # special chars in commandline assigned value;
 # have to use local echo to avoid quoting problems.
 
@@ -166,10 +166,6 @@
 $awk 'BEGIN { printf("a%c%c%cz\n", "\b", "\r", "\f") }' >foo1 
 echo 'hello' | $awk '{print x}' 'x=a\b\r\fz' >foo2
 diff foo1 foo2 || echo 'BAD: T.clv (x=19)'
-
-
-### newer -v tests
-
 
 $awk -vx 'BEGIN {print x}' >foo >[2=1]
 grep 'invalid -v option argument: x' foo >/dev/null || echo 'BAD: T.clv (x=20)'
--- /dev/null
+++ b/test/T.getline
@@ -1,0 +1,99 @@
+#!/bin/rc
+echo T.getline: test getline function
+
+who >foo1
+cat foo1 | $awk '
+BEGIN {
+	while (getline)
+		print
+	exit
+}
+' >foo
+cmp -s foo1 foo || echo 'BAD: T.getline (bare getline)'
+
+who >foo1
+cat foo1 | $awk '
+BEGIN {
+	while (getline xxx)
+		print xxx
+	exit
+}
+' >foo
+cmp -s foo1 foo || echo 'BAD: T.getline (getline xxx)'
+
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
+$awk '
+BEGIN {
+	while (getline <"foo.txt")
+		print
+	exit
+}
+' >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <file)'
+
+cat foo.txt | $awk '
+BEGIN {
+	while (getline <"-")	# stdin
+		print
+	exit
+}
+' >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <"-")'
+
+$awk '
+BEGIN {
+	while (getline <ARGV[1])
+		print
+	exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <arg)'
+
+$awk '
+BEGIN {
+	while (getline x <ARGV[1])
+		print x
+	exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline x <arg)'
+
+$awk '
+BEGIN {
+	while (("cat " ARGV[1]) | getline)
+		print
+	exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (cat arg | getline)'
+
+$awk '
+BEGIN {
+	while (("cat " ARGV[1]) | getline x)
+		print x
+	exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (cat arg | getline x)'
+
+$awk ' BEGIN { print getline <"/glop/glop/glop" } ' >foo
+echo '-1' >foo1
+cmp -s foo foo1 || echo 'BAD: T.getline (non-existent file)'
+
+echo 'false false equal' >foo1
+$awk 'BEGIN {
+	"echo 0" | getline
+	if ($0) printf "true " 
+	else printf "false "
+	if ($1) printf "true " 
+	else printf "false "
+	if ($0==$1) printf "equal\n"
+	else printf "not equal\n"
+}' >foo2
+cmp -s foo1 foo2 || echo >[1=2] 'BAD: T.getline bad $0 type in cmd|getline'
+
+echo 'L1
+L2' | $awk 'BEGIN { $0="old stuff"; $1="new"; getline x; print}' >foo1
+echo 'new stuff' >foo2
+cmp -s foo1 foo2 || echo >[1=2] 'BAD: T.getline bad update $0'
--- a/test/T.misc
+++ b/test/T.misc
@@ -68,11 +68,13 @@
 echo '#' >foo2
 diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad match of 8-bit char'
 
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
 echo hello |
-$awk 'BEGIN	{ FILENAME = "/lib/ucd/UnicodeData.txt" }
+$awk 'BEGIN	{ FILENAME = "foo.txt" }
 	{ print $0 }' >/dev/null
 if(! ~ $status '')
-	echo >[1=2] 'BAD: T.misc /lib/ucd/UnicodeData.txt dropped core'
+	echo >[1=2] 'BAD: T.misc foo.txt dropped core'
 
 echo hello |
 $awk '  function foo(foo) {
@@ -161,9 +163,7 @@
 	echo >[1=2] 'BAD: T.misc unireghf dropped core'
 
 echo x | $awk '/[/]/' >[2]foo
-grep 'nonterminated character class' foo >/dev/null || error 'BAD: T.misc nonterminated fails'
-if(! ~ $status '')
-	echo >[1=2] 'BAD: T.misc nonterminated dropped core'
+grep 'nonterminated character class' foo >/dev/null || echo 'BAD: T.misc nonterminated fails'
 
 $awk '
 function f() { return 12345 }
@@ -277,8 +277,6 @@
 # The following syntax error should not dump core:
 $awk '{ n = split($1, address, !); print address[1] }' >[2]foo
 grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc split error'
-if(! ~ $status '')
-	echo >[1=2] 'BAD: T.misc split! dropped core'
 
 # The following should cause a syntax error message
 $awk 'BEGIN {"hello"}' >[2]foo
@@ -337,12 +335,12 @@
 
 echo 1 >foo1
 $awk '	{ exit }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit'
 
 echo 1 >foo1
 $awk '	{i = 1; while (i <= NF) {if (i == NF) exit; i++ } }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 2'
 
 echo 1 >foo1
@@ -350,7 +348,7 @@
 		i = 1; while (i <= NF) {if (i == NF) return NR; i++ }
 	}
 	{ if (f() == 1) exit }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
 
 echo 1 >foo1
@@ -359,12 +357,12 @@
 		for (i in arr) {if (i == 3) return NR; i++ }
 	}
 	{ if (f() == 1) exit }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
 
 echo 1 >foo1
 $awk '	{i = 1; do { if (i == NF) exit; i++ } while (i <= NF) }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 3'
 
 echo 1 >foo1
@@ -372,12 +370,12 @@
 		i = 1; do { if (i == NF) return NR; i++ } while (i <= NF)
 	}
 	{ if (f() == 1) exit }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc do return'
 
 echo 1 >foo1
 $awk '	{i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 4'
 
 echo 1 >foo1
@@ -385,7 +383,7 @@
 	  for (i in x) {
 	 	if (i == 1)
 			exit } }
-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2
+	END { print NR }' foo.txt >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 5'
 
 echo XXXXXXXX >foo1
@@ -393,7 +391,7 @@
 	t = s; 	gsub("[" s "]", "X", t); print t }' >foo2
 cmp -s foo1 foo2 || echo 'BAD: T.misc weird escapes in char class'
 
-$awk '{}' /lib/ucd/UnicodeData.txt glop/glop >foo >[2]foo2
+$awk '{}' foo.txt glop/glop >foo >[2]foo2
 grep 'can''t open.*glop' foo2 >/dev/null || echo 'BAD: T.misc can''t open'
 
 echo '
--- a/test/T.overflow
+++ b/test/T.overflow
@@ -74,7 +74,7 @@
 		n++
 	print n
 }' >foo2
-cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'
+cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'
 
 echo x >foo1
 $awk '{print $40000000000000}' <foo1 >foo2 >[2]foo
--- a/test/T.redir
+++ b/test/T.redir
@@ -1,21 +1,23 @@
 #!/bin/rc
 echo T.redir: test redirections
 
-$awk '{ print >"foo" }' /lib/ucd/UnicodeData.txt
-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"foo")'
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
 
+$awk '{ print >"foo" }' foo.txt
+diff foo foo.txt || echo 'BAD: T.redir (print >"foo")'
+
 rm -f foo
-$awk '{ print >>"foo" }' /lib/ucd/UnicodeData.txt
-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >>"foo")'
+$awk '{ print >>"foo" }' foo.txt
+diff foo foo.txt || echo 'BAD: T.redir (print >>"foo")'
 
 rm -f foo
 $awk 'NR%2 == 1 { print >>"foo" }
-      NR%2 == 0 { print >"foo" }' /lib/ucd/UnicodeData.txt
-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print > and >>"foo")'
+      NR%2 == 0 { print >"foo" }' foo.txt
+diff foo foo.txt || echo 'BAD: T.redir (print > and >>"foo")'
 
 rm -f foo
-$awk '{ print | "cat >foo" }' /lib/ucd/UnicodeData.txt
-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print | "cat >foo")'
+$awk '{ print | "cat >foo" }' foo.txt
+diff foo foo.txt || echo 'BAD: T.redir (print | "cat >foo")'
 
 # tests flush of stdout before opening pipe
 echo '   head
@@ -26,10 +28,10 @@
 		print i | "sort" }' >foo2
 diff foo1 foo2 || echo 'BAD: T.redir (buffering)'
 
-$awk '{ print >"/fd/2" }' /lib/ucd/UnicodeData.txt >foo1 >[2]foo2
-diff foo2 /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"/fd/2")'
+$awk '{ print >"/fd/2" }' foo.txt >foo1 >[2]foo2
+diff foo2 foo.txt || echo 'BAD: T.redir (print >"/fd/2")'
 diff foo1 /dev/null  || echo 'BAD: T.redir (print >"/fd/2")'
 
-$awk '{ print >"/fd/1" }' /lib/ucd/UnicodeData.txt >foo1 >[2]foo2
-diff foo1 /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"/fd/1")'
+$awk '{ print >"/fd/1" }' foo.txt >foo1 >[2]foo2
+diff foo1 foo.txt || echo 'BAD: T.redir (print >"/fd/1")'
 diff foo2 /dev/null  || echo 'BAD: T.redir (print >"/fd/2")'
--- a/test/T.split
+++ b/test/T.split
@@ -51,10 +51,12 @@
 
 # getline var shouldn't impact fields.
 
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
 echo 'f b a' > $TEMP0
 $awk '{
 	FS = ":"
-	getline a < "/lib/ucd/UnicodeData.txt"
+	getline a < "foo.txt"
 	print $1
 }' $TEMP0 > $TEMP1
 echo 'f' > $TEMP2
@@ -101,7 +103,7 @@
 
      ' > $TEMP0
 who | sed 10q  >> $TEMP0
-sed 10q /lib/ucd/UnicodeData.txt >> $TEMP0
+sed 10q foo.txt >> $TEMP0
 
 $awk '
 {	n = split($0, x, "")
--- /dev/null
+++ b/test/T.utf
@@ -1,0 +1,191 @@
+#!/bin/rc
+echo T.utf: tests of utf functions
+
+$awk '
+BEGIN {
+	FS = "\t"
+	awk = ENVIRON["awk"]
+}
+NF == 0 || $1 ~ /^#/ {
+	next
+}
+$1 ~ /try/ {	# new test
+	nt++
+	sub(/try [a-zA-Z_0-9]+ /, "")
+	prog = $0
+	printf("try %3d %s\n", nt, prog)
+	prog = sprintf("%s -F''\\t'' ''%s''", awk, prog)
+	# print "prog is", prog
+	nt2 = 0
+	while (getline > 0) {
+		if (NF == 0)	# blank line terminates a sequence
+			break
+		input = $1
+		for (i = 2; i < NF; i++)	# input data
+			input = input "\t" $i
+		test = sprintf("echo ''%s'' | %s >foo1; ",
+			input, prog)
+		if ($NF == "\"\"")
+			output = ">foo2;"
+		else
+			output = sprintf("echo ''%s'' >foo2; ", $NF)
+		gsub(/\\t/, "\t", output)
+		gsub(/\\n/, "\n", output)
+		run = sprintf("diff foo1 foo2 || echo test %d.%d failed",
+			nt, ++nt2)
+		# print  "input is", input
+		# print  "test is", test
+		# print  "output is", output
+		# print  "run is", run
+		system(test output run)
+	}
+	tt += nt2
+}
+END { print tt, "tests" }
+' <<'!!!!'
+# General format:
+# try program as rest of line
+# $1	$2	$3	output1  (\t for tab, \n for newline,
+# $1	$2	$3	output2  ("" for null)
+# ... terminated by blank line
+
+# try another program...
+
+try length { print length($1) }
+	0
+a	1
+の今がその時だ	7
+Сейчас	6
+现在是时候了	6
+给所有的好男	6
+来参加聚会。	6
+😀	1
+🖕 finger	8
+Τωρα	4
+για	3
+να	2
+עכשיו	5
+לכל	3
+לבוא	4
+の今がその時だ	7
+지금이	3
+모든	2
+파티에	3
+Сейчас	6
+для	3
+прийти	6
+
+try index { print index($1, $2) }
+abc	a	1
+abc	b	2
+abc	x	0
+现在是时候了	""	0
+现在是时候了	了	6
+现在是时候了	在是	2
+现在是时候了	x	0
+现x在是时候了	x	2
+🖕 fingerすべての善人のためにすべての善人のために	f	3
+🖕 finger🖕	r🖕	8
+
+try substr { print substr($0, 2, 3) }
+abcdef	bcd
+Τωρα ειναι η	ωρα
+Τω	ω
+지금 이절호의	금 이
+xпyрийти	пyр
+
+try rematch { print $1 ~ $2 }
+abc	a	1
+abc	x	0
+すべての善人のために	の	1
+すべての善人のために	の.*の	1
+すべての善人のために	の.*て	0
+Τωρα	ω+	1
+
+# replace first occurrence of $2 by $3 in $1
+try sub { n = sub($2, $3, $1); print n, $1 }
+abcdef	bc	XYZ	1 aXYZdef
+abcdef	xy	XYZ	0 abcdef
+の今がその時だ	の	NO	1 NO今がその時だ
+🖕 finger	🖕.*g	FING	1 FINGer
+Сейчас	.	x	1 xейчас
+
+# replace all occurrences of $2 by $3 in $1
+try gsub { n = gsub($2, $3, $1); print n, $1 }
+abcdef	bc	XYZ	1 aXYZdef
+abcdef	xy	XYZ	0 abcdef
+の今がその時だ	の	NO	2 NO今がそNO時だ
+🖕 finger	🖕.*g	FING	1 FINGer
+Сейчас	.	x	6 xxxxxx
+
+try match { print match($1, $2), RSTART, RLENGTH }
+abc	[^a]	2 2 1
+abc	[^ab]	3 3 1
+すべての善人のために	[^す]	2 2 1
+すべての善人のために	[^ぁ-ゖ]	5 5 1
+abc	a	1 1 1
+abc	x	0 0 -1
+すべての善人のために	の	4 4 1
+すべての善人のために	の.*の	4 4 4
+すべての善人のために	の.*て	0 0 -1
+Τωρα	ω+	2 2 1
+Τωρα	x+	0 0 -1
+Τωρα	ω.	2 2 2
+すべての善人のために	[の]	4 4 1
+すべての善人のために	[ぁ-え]	0 0 -1
+すべての善人のために	[^ぁ-え]	1 1 1
+Τωρα ειναι η	[α-ω]	2 2 1
+Τωρα ειναι η	[α-ω]+	2 2 3
+xxxΤωρα ειναι η	[Α-Ω]	4 4 1
+για όλους τους καλούς ά	α.*α	3 3 15
+να έρθει στο πά	[^ν]	2 2 1
+
+# FS="" should split into unicode chars
+try emptyFS BEGIN {FS=""} {print NF}
+すべての善人のために	10
+の今がその時だ	7
+Сейчас	6
+现在是时候了	6
+给所有的好男	6
+来参加聚会。	6
+😀	1
+🖕 finger	8
+
+# printf(%N.Ns) for utf8 strings
+try printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}
+abcd	[   ab][ab   ]
+现在abc	[   现在][现在   ]
+现ωabc	[   现ω][现ω   ]
+ωabc	[   ωa][ωa   ]
+Сейчас	[   Се][Се   ]
+Сейxyz	[   Се][Се   ]
+😀	[    😀][😀    ]
+
+# printf(%N.Ns) for utf8 strings
+try printfs2 {printf("[%5s][%-5s]\n"), $1, $1}
+abcd	[ abcd][abcd ]
+现在ab	[ 现在ab][现在ab ]
+a现在ab	[a现在ab][a现在ab]
+a现在abc	[a现在abc][a现在abc]
+现ωab	[ 现ωab][现ωab ]
+ωabc	[ ωabc][ωabc ]
+Сейчас	[Сейчас][Сейчас]
+😀	[    😀][😀    ]
+
+# printf(%N.Ns) for utf8 strings
+try printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}
+abcd	[ab][ab]
+现在abc	[现在][现在]
+现ωabc	[现ω][现ω]
+ω	[ω][ω]
+😀	[😀][😀]
+
+# printf(%c) for utf
+try printfc {printf("%c %c\n", $1, substr($1,2,1))}
+すべての善人のために	す べ
+の今がその時だ	の 今
+Сейчас	С е
+现在是时候了	现 在
+😀🖕	😀 🖕
+
+!!!!
--- /dev/null
+++ b/test/chem.awk
@@ -1,0 +1,492 @@
+BEGIN {
+	macros = "/usr/bwk/chem/chem.macros"	# CHANGE ME!!!!!
+	macros = "/dev/null" # since originals are lost
+
+	pi = 3.141592654
+	deg = 57.29578
+	setparams(1.0)
+	set(dc, "up 0 right 90 down 180 left 270 ne 45 se 135 sw 225 nw 315")
+	set(dc, "0 n 30 ne 45 ne 60 ne 90 e 120 se 135 se 150 se 180 s")
+	set(dc, "300 nw 315 nw 330 nw 270 w 210 sw 225 sw 240 sw")
+}
+function init() {
+	printf ".PS\n"
+	if (firsttime++ == 0) {
+		printf "copy \"%s\"\n", macros
+		printf "\ttextht = %g; textwid = .1; cwid = %g\n", textht, cwid
+		printf "\tlineht = %g; linewid = %g\n", lineht, linewid
+	}
+	printf "Last: 0,0\n"
+	RING = "R"; MOL = "M"; BOND = "B"; OTHER = "O"	# manifests
+	last = OTHER
+	dir = 90
+}
+function setparams(scale) {
+	lineht = scale * 0.2
+	linewid = scale * 0.2
+	textht = scale * 0.16
+	db = scale * 0.2		# bond length
+	cwid = scale * 0.12		# character width
+	cr = scale * 0.08		# rad of invis circles at ring vertices
+	crh = scale * 0.16		# ht of invis ellipse at ring vertices
+	crw = scale * 0.12		# wid	
+	dav = scale * 0.015		# vertical shift up for atoms in atom macro
+	dew = scale * 0.02		# east-west shift for left of/right of
+	ringside = scale * 0.3		# side of all rings
+	dbrack = scale * 0.1		# length of bottom of bracket
+}
+
+	{ lineno++ }
+
+/^(\.cstart)|(begin chem)/	{ init(); inchem = 1; next }
+/^(\.cend)|(end)/		{ inchem = 0; print ".PE"; next }
+
+/^\./		{ print; next }		# troff
+
+inchem == 0	{ print; next }		# everything else
+
+$1 == "pic"	{ shiftfields(1); print; next }	# pic pass-thru
+$1 ~ /^#/	{ next }	# comment
+
+$1 == "textht"	{ textht = $NF; next }
+$1 == "cwid"	{ cwid = $NF; next }
+$1 == "db"	{ db = $NF; next }
+$1 == "size"	{ if ($NF <= 4) size = $NF; else size = $NF/10
+		  setparams(size); next }
+
+	{ print "\n#", $0 }	# debugging, etc.
+	{ lastname = "" }
+
+$1 ~ /^[A-Z].*:$/ {	# label;  falls thru after shifting left
+	lastname = substr($1, 1, length($1)-1)
+	print $1
+	shiftfields(1)
+}
+
+$1 ~ /^\"/	{ print "Last: ", $0; last = OTHER; next }
+
+$1 ~ /bond/	{ bond($1); next }
+$1 ~ /^(double|triple|front|back)$/ && $2 == "bond" {
+		   $1 = $1 $2; shiftfields(2); bond($1); next }
+
+$1 == "aromatic" { temp = $1; $1 = $2; $2 = temp }
+$1 ~ /ring|benz/ { ring($1); next }
+
+$1 == "methyl"	{ $1 = "CH3" }	# left here as an example
+
+$1 ~ /^[A-Z]/	{ molecule(); next }
+
+$1 == "left"	{ left[++stack] = fields(2, NF); printf("Last: [\n"); next }
+
+$1 == "right"	{ bracket(); stack--; next }
+
+$1 == "label"	{ label(); next }
+
+/./	{ print "Last: ", $0; last = OTHER }	
+
+END	{ if (firsttime == 0) error("did you forget .cstart and .cend?")
+	  if (inchem) printf ".PE\n"
+}
+
+function bond(type,	i, goes, from) {
+	goes = ""
+	for (i = 2; i <= NF; i++)
+		if ($i == ";") {
+			goes = $(i+1)
+			NF = i - 1
+			break
+		}
+	leng = db
+	from = ""
+	for (cf = 2; cf <= NF; ) {
+		if ($cf ~ /(\+|-)?[0-9]+|up|down|right|left|ne|se|nw|sw/)
+			dir = cvtdir(dir)
+		else if ($cf ~ /^leng/) {
+			leng = $(cf+1)
+			cf += 2
+		} else if ($cf == "to") {
+			leng = 0
+			from = fields(cf, NF)
+			break
+		} else if ($cf == "from") {
+			from = dofrom()
+			break
+		} else if ($cf ~ /^#/) {
+			cf = NF+1
+			break;
+		} else {
+			from = fields(cf, NF)
+			break
+		}
+	}
+	if (from ~ /( to )|^to/)	# said "from ... to ...", so zap length
+		leng = 0
+	else if (from == "")	# no from given at all
+		from = "from Last." leave(last, dir) " " fields(cf, NF)
+	printf "Last: %s(%g, %g, %s)\n", type, leng, dir, from
+	last = BOND
+	if (lastname != "")
+		labsave(lastname, last, dir)
+	if (goes) {
+		$0 = goes
+		molecule()
+	}
+}
+
+function dofrom(	n, s) {
+	cf++	# skip "from"
+	n = $cf
+	if (n in labtype)	# "from Thing" => "from Thing.V.s"
+		return "from " n "." leave(labtype[n], dir)
+	if (n ~ /^\.[A-Z]/)	# "from .V" => "from Last.V.s"
+		return "from Last" n "." corner(dir)
+	if (n ~ /^[A-Z][^.]*\.[A-Z][^.]*$/)	# "from X.V" => "from X.V.s"
+		return "from " n "." corner(dir)
+	return fields(cf-1, NF)
+}
+
+function bracket(	t) {
+	printf("]\n")
+	if ($2 == ")")
+		t = "spline"
+	else
+		t = "line"
+	printf("%s from last [].sw+(%g,0) to last [].sw to last [].nw to last [].nw+(%g,0)\n",
+		t, dbrack, dbrack)
+	printf("%s from last [].se-(%g,0) to last [].se to last [].ne to last [].ne-(%g,0)\n",
+		t, dbrack, dbrack)
+	if ($3 == "sub")
+		printf("\" %s\" ljust at last [].se\n", fields(4,NF))
+}
+
+function molecule(	n, type) {
+	n = $1
+	if (n == "BP") {
+		$1 = "\"\" ht 0 wid 0"
+		type = OTHER
+	} else {
+		$1 = atom(n)
+		type = MOL
+	}
+	gsub(/[^A-Za-z0-9]/, "", n)	# for stuff like C(OH3): zap non-alnum
+	if ($2 == "")
+		printf "Last: %s: %s with .%s at Last.%s\n", \
+			n, $0, leave(type,dir+180), leave(last,dir)
+	else if ($2 == "below")
+		printf("Last: %s: %s with .n at %s.s\n", n, $1, $3)
+	else if ($2 == "above")
+		printf("Last: %s: %s with .s at %s.n\n", n, $1, $3)
+	else if ($2 == "left" && $3 == "of")
+		printf("Last: %s: %s with .e at %s.w+(%g,0)\n", n, $1, $4, dew)
+	else if ($2 == "right" && $3 == "of")
+		printf("Last: %s: %s with .w at %s.e-(%g,0)\n", n, $1, $4, dew)
+	else
+		printf "Last: %s: %s\n", n, $0
+	last = type
+	if (lastname != "")
+		labsave(lastname, last, dir)
+	labsave(n, last, dir)
+}
+
+function label(	i, v) {
+	if (substr(labtype[$2], 1, 1) != RING)
+		error(sprintf("%s is not a ring", $2))
+	else {
+		v = substr(labtype[$2], 2, 1)
+		for (i = 1; i <= v; i++)
+			printf("\"\\s-3%d\\s0\" at 0.%d<%s.C,%s.V%d>\n", i, v+2, $2, $2, i)
+	}
+}
+
+function ring(type,	typeint, pt, verts, i) {
+	pt = 0	# points up by default
+	if (type ~ /[1-8]$/)
+		verts = substr(type, length(type), 1)
+	else if (type ~ /flat/)
+		verts = 5
+	else
+		verts = 6
+	fused = other = ""
+	for (i = 1; i <= verts; i++)
+		put[i] = dbl[i] = ""
+	nput = aromatic = withat = 0
+	for (cf = 2; cf <= NF; ) {
+		if ($cf == "pointing")
+			pt = cvtdir(0)
+		else if ($cf == "double" || $cf == "triple")
+			dblring(verts)
+		else if ($cf ~ /arom/) {
+			aromatic++
+			cf++	# handled later
+		} else if ($cf == "put") {
+			putring(verts)
+			nput++
+		} else if ($cf ~ /^#/) {
+			cf = NF+1
+			break;
+		} else {
+			if ($cf == "with" || $cf == "at")
+				withat = 1
+			other = other " " $cf
+			cf++
+		}
+	}
+	typeint = RING verts pt		# RING | verts | dir
+	if (withat == 0)
+		fused = joinring(typeint, dir, last)
+	printf "Last: [\n"
+	makering(type, pt, verts)
+	printf "] %s %s\n", fused, other
+	last = typeint
+	if (lastname != "")
+		labsave(lastname, last, dir)
+}
+
+function makering(type, pt, v,       i, a, r) {
+	if (type ~ /flat/)
+		v = 6
+    # vertices
+	r = ringside / (2 * sin(pi/v))
+	printf "\tC: 0,0\n"
+	for (i = 0; i <= v+1; i++) {
+		a = ((i-1) / v * 360 + pt) / deg
+		printf "\tV%d: (%g,%g)\n", i, r * sin(a), r * cos(a)
+	}
+	if (type ~ /flat/) {
+		printf "\tV4: V5; V5: V6\n"
+		v = 5
+	}
+    # sides
+	if (nput > 0) {	# hetero ...
+		for (i = 1; i <= v; i++) {
+			c1 = c2 = 0
+			if (put[i] != "") {
+				printf("\tV%d: ellipse invis ht %g wid %g at V%d\n",
+					i, crh, crw, i)
+				printf("\t%s at V%d\n", put[i], i)
+				c1 = cr
+			}
+			j = i+1
+			if (j > v)
+				j = 1
+			if (put[j] != "")
+				c2 = cr
+			printf "\tline from V%d to V%d chop %g chop %g\n", i, j, c1, c2
+			if (dbl[i] != "") {	# should check i<j
+				if (type ~ /flat/ && i == 3) {
+					rat = 0.75; fix = 5
+				} else {
+					rat = 0.85; fix = 1.5
+				}
+				if (put[i] == "")
+					c1 = 0
+				else
+					c1 = cr/fix
+				if (put[j] == "")
+					c2 = 0
+				else
+					c2 = cr/fix
+				printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",
+					rat, i, rat, j, c1, c2
+				if (dbl[i] == "triple")
+					printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",
+						2-rat, i, 2-rat, j, c1, c2
+			}
+		}
+	} else {	# regular
+		for (i = 1; i <= v; i++) {
+			j = i+1
+			if (j > v)
+				j = 1
+			printf "\tline from V%d to V%d\n", i, j
+			if (dbl[i] != "") {	# should check i<j
+				if (type ~ /flat/ && i == 3) {
+					rat = 0.75
+				} else
+					rat = 0.85
+				printf "\tline from %g<C,V%d> to %g<C,V%d>\n",
+					rat, i, rat, j
+				if (dbl[i] == "triple")
+					printf "\tline from %g<C,V%d> to %g<C,V%d>\n",
+						2-rat, i, 2-rat, j
+			}
+		}
+	}
+	# punt on triple temporarily
+    # circle
+	if (type ~ /benz/ || aromatic > 0) {
+		if (type ~ /flat/)
+			r *= .4
+		else
+			r *= .5
+		printf "\tcircle rad %g at 0,0\n", r
+	}
+}
+
+function putring(v) {	# collect "put Mol at n"
+	cf++
+	mol = $(cf++)
+	if ($cf == "at")
+		cf++
+	if ($cf >= 1 && $cf <= v) {
+		m = mol
+		gsub(/[^A-Za-z0-9]/, "", m)
+		put[$cf] = m ":" atom(mol)
+	}
+	cf++
+}
+
+function joinring(type, dir, last) {	# join a ring to something
+	if (substr(last, 1, 1) == RING) {	# ring to ring
+		if (substr(type, 3) == substr(last, 3))	# fails if not 6-sided
+			return "with .V6 at Last.V2"
+	}
+	# if all else fails
+	return sprintf("with .%s at Last.%s", \
+		leave(type,dir+180), leave(last,dir))
+}
+
+function leave(last, d,		c, c1) {	# return vertex of last in dir d
+	if (last == BOND)
+		return "end"
+	d = reduce(d)
+	if (substr(last, 1, 1) == RING)
+		return ringleave(last, d)
+	if (last == MOL) {
+		if (d == 0 || d == 180)
+			c = "C"
+		else if (d > 0 && d < 180)
+			c = "R"
+		else
+			c = "L"
+		if (d in dc)
+			c1 = dc[d]
+		else 
+			c1 = corner(d)
+		return sprintf("%s.%s", c, c1)
+	}
+	if (last == OTHER)
+		return corner(d)
+	return "c"
+}
+
+function ringleave(last, d,	rd, verts) {	# return vertex of ring in dir d
+	verts = substr(last, 2, 1)
+	rd = substr(last, 3)
+	return sprintf("V%d.%s", int(reduce(d-rd)/(360/verts)) + 1, corner(d))
+}
+
+function corner(dir) {
+	return dc[reduce(45 * int((dir+22.5)/45))]
+}	
+
+function labsave(name, type, dir) {
+	labtype[name] = type
+	labdir[name] = dir
+}
+
+function dblring(v,	d, v1, v2) {	# should canonicalize to i,i+1 mod v
+	d = $cf
+	for (cf++; $cf ~ /^[1-9]/; cf++) {
+		v1 = substr($cf,1,1)
+		v2 = substr($cf,3,1)
+		if (v2 == v1+1 || v1 == v && v2 == 1)	# e.g., 2,3 or 5,1
+			dbl[v1] = d
+		else if (v1 == v2+1 || v2 == v && v1 == 1)	# e.g., 3,2 or 1,5
+			dbl[v2] = d
+		else
+			error(sprintf("weird %s bond in\n\t%s", d, $0))
+	}
+}
+
+function cvtdir(d) {	# maps "[pointing] somewhere" to degrees
+	if ($cf == "pointing")
+		cf++
+	if ($cf ~ /^[+\-]?[0-9]+/)
+		return reduce($(cf++))
+	else if ($cf ~ /left|right|up|down|ne|nw|se|sw/)
+		return reduce(dc[$(cf++)])
+	else {
+		cf++
+		return d
+	}
+}
+
+function reduce(d) {	# reduces d to 0 <= d < 360
+	while (d >= 360)
+		d -= 360
+	while (d < 0)
+		d += 360
+	return d
+}
+
+function atom(s,    c, i, n, nsub, cloc, nsubc) { # convert CH3 to atom(...)
+	if (s == "\"\"")
+		return s
+	n = length(s)
+	nsub = nsubc = 0
+	cloc = index(s, "C")
+	if (cloc == 0)
+		cloc = 1
+	for (i = 1; i <= n; i++)
+		if (substr(s, i, 1) !~ /[A-Z]/) {
+			nsub++
+			if (i < cloc)
+				nsubc++
+		}
+	gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s)
+	if (s ~ /([^0-9]\.)|(\.[^0-9])/)	# centered dot
+		gsub(/\./, "\\v#-.3m#.\\v#.3m#", s)
+	return sprintf("atom(\"%s\", %g, %g, %g, %g, %g, %g)",
+		s, (n-nsub/2)*cwid, textht, (cloc-nsubc/2-0.5)*cwid, crh, crw, dav)
+}
+
+function in_line(	i, n, s, s1, os) {
+	s = $0
+	os = ""
+	while ((n = match(s, /!?[A-Z][A-Za-z]*(([0-9]+\.[0-9]+)|([0-9]+))/)) > 0) {
+		os = os substr(s, 1, n-1)	# prefix
+		s1 = substr(s, n, RLENGTH)	# molecule
+		if (substr(s1, 1, 1) == "!") {	# !mol => leave alone
+			s1 = substr(s1, 2)
+		} else {
+			gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s1)
+			if (s1 ~ /([^0-9]\.)|(\.[^0-9])/)	# centered dot
+				gsub(/\./, "\\v#-.3m#.\\v#.3m#", s1)
+		}
+		os = os s1
+		s = substr(s, n + RLENGTH)	# tail
+	}
+	os = os s
+	print os
+	return
+}
+
+function shiftfields(n,		i) {	# move $n+1..$NF to $n..$NF-1, zap $NF
+	for (i = n; i < NF; i++)
+		$i = $(i+1)
+	$NF = ""
+	NF--
+}
+
+function fields(n1, n2,		i, s) {
+	if (n1 > n2)
+		return ""
+	s = ""
+	for (i = n1; i <= n2; i++) {
+		if ($i ~ /^#/)
+			break;
+		s = s $i " "
+	}
+	return s
+}
+
+function set(a, s,     i, n, q) {
+	n = split(s, q)
+	for (i = 1; i <= n; i += 2)
+		a[q[i]] = q[i+1]
+}
+
+function error(s) {
+	printf "chem\007: error on line %d: %s\n", lineno, s | "cat 1>&2"
+}
--