shithub: kwa

--- a/test/T.-f-f

+++ b/test/T.-f-f

@@ -8,15 +8,17 @@

 echo xxx | $awk -f foo1 -f foo2 >foo3

 diff foo foo3 || echo 'BAD: T.-f-f multiple -fs'

-echo '/a/' | $awk -f - /lib/ucd/UnicodeData.txt >foo1

-$awk '/a/' /lib/ucd/UnicodeData.txt >foo2

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

+echo '/a/' | $awk -f - foo.txt >foo1

+$awk '/a/' foo.txt >foo2

 diff foo1 foo2 || echo 'BAD: T.-f-f  -f -'

-cp /lib/ucd/UnicodeData.txt foo1

+cp foo.txt foo1

 echo '/./ {' >foo2

 echo 'print' >foo3

 echo '}' >foo4

-$awk -f foo2 -f foo3 -f foo4 /lib/ucd/UnicodeData.txt >foo5

+$awk -f foo2 -f foo3 -f foo4 foo.txt >foo5

 diff foo1 foo5 || echo 'BAD: T.-f-f 3 files'

 echo '/./ {' >foo2

@@ -26,5 +28,5 @@

 ]' >foo4

-$awk -f foo2 -f foo3 -f foo4 /lib/ucd/UnicodeData.txt >foo5 >[2]foo6

+$awk -f foo2 -f foo3 -f foo4 foo.txt >foo5 >[2]foo6

 grep 'syntax error.*at foo4:' foo6 >/dev/null >[2=1] || echo 'BAD: T.-f-f source file name'

--- a/test/T.close

+++ b/test/T.close

@@ -1,9 +1,11 @@

 #!/bin/rc

 echo T.close: test close built-in

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

 rm -f foo

-$awk '{ print >>"foo"; close("foo") }' /lib/ucd/UnicodeData.txt

-diff /lib/ucd/UnicodeData.txt foo || echo 'BAD: T.close (1)'

+$awk '{ print >>"foo"; close("foo") }' foo.txt

+diff foo.txt foo || echo 'BAD: T.close (1)'

 ls -l >foo

 tail -1 foo >foo1

@@ -12,7 +14,7 @@

 echo 0 >foo1

 $awk '	# non-accessible file

-  BEGIN { getline <"/lib/ucd/UnicodeData.txt"; print close("/lib/ucd/UnicodeData.txt"); }

+  BEGIN { getline <"foo.txt"; print close("foo.txt"); }

 ' >foo2

 diff foo1 foo2 || echo 'BAD: T.close (3)'

--- a/test/T.clv

+++ b/test/T.clv

@@ -134,11 +134,13 @@

 $awk -f foo0 '-vx=123' '-vy=abc' '-vz1=10.99' /dev/null 'x=4567' /dev/null >foo2

 diff foo1 foo2 || echo 'BAD: T.clv (x=15a)'

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

 echo 'BEGIN { print x, y, z1 }

 NR==1 { print x }' >foo0

 echo '123 abc 10.99

 4567' >foo1

-$awk -v 'x=123' -v 'y=abc' -v 'z1=10.99' -f foo0 'x=4567' /lib/ucd/UnicodeData.txt >foo2

+$awk -v 'x=123' -v 'y=abc' -v 'z1=10.99' -f foo0 'x=4567' foo.txt >foo2

 diff foo1 foo2 || echo 'BAD: T.clv (x=16)'

 echo 'BEGIN { print x, y, z1 }

@@ -145,11 +147,9 @@

 NR==1 { print x }' >foo0

 echo '123 abc 10.99

 4567' >foo1

-$awk '-vx=123' '-vy=abc' '-vz1=10.99' -f foo0 'x=4567' /lib/ucd/UnicodeData.txt >foo2

+$awk '-vx=123' '-vy=abc' '-vz1=10.99' -f foo0 'x=4567' foo.txt >foo2

 diff foo1 foo2 || echo 'BAD: T.clv (x=16a)'

 # special chars in commandline assigned value;

 # have to use local echo to avoid quoting problems.

@@ -166,10 +166,6 @@

 $awk 'BEGIN { printf("a%c%c%cz\n", "\b", "\r", "\f") }' >foo1

 echo 'hello' | $awk '{print x}' 'x=a\b\r\fz' >foo2

 diff foo1 foo2 || echo 'BAD: T.clv (x=19)'

-### newer -v tests

 $awk -vx 'BEGIN {print x}' >foo >[2=1]

 grep 'invalid -v option argument: x' foo >/dev/null || echo 'BAD: T.clv (x=20)'

--- /dev/null

+++ b/test/T.getline

@@ -1,0 +1,99 @@

+#!/bin/rc

+echo T.getline: test getline function

+who >foo1

+cat foo1 | $awk '

+BEGIN {

+	while (getline)

+		print

+	exit

+}

+' >foo

+cmp -s foo1 foo || echo 'BAD: T.getline (bare getline)'

+who >foo1

+cat foo1 | $awk '

+BEGIN {

+	while (getline xxx)

+		print xxx

+	exit

+}

+' >foo

+cmp -s foo1 foo || echo 'BAD: T.getline (getline xxx)'

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

+$awk '

+BEGIN {

+	while (getline <"foo.txt")

+		print

+	exit

+}

+' >foo

+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <file)'

+cat foo.txt | $awk '

+BEGIN {

+	while (getline <"-")	# stdin

+		print

+	exit

+}

+' >foo

+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <"-")'

+$awk '

+BEGIN {

+	while (getline <ARGV[1])

+		print

+	exit

+}

+' foo.txt >foo

+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <arg)'

+$awk '

+BEGIN {

+	while (getline x <ARGV[1])

+		print x

+	exit

+}

+' foo.txt >foo

+cmp -s foo.txt foo || echo 'BAD: T.getline (getline x <arg)'

+$awk '

+BEGIN {

+	while (("cat " ARGV[1]) | getline)

+		print

+	exit

+}

+' foo.txt >foo

+cmp -s foo.txt foo || echo 'BAD: T.getline (cat arg | getline)'

+$awk '

+BEGIN {

+	while (("cat " ARGV[1]) | getline x)

+		print x

+	exit

+}

+' foo.txt >foo

+cmp -s foo.txt foo || echo 'BAD: T.getline (cat arg | getline x)'

+$awk ' BEGIN { print getline <"/glop/glop/glop" } ' >foo

+echo '-1' >foo1

+cmp -s foo foo1 || echo 'BAD: T.getline (non-existent file)'

+echo 'false false equal' >foo1

+$awk 'BEGIN {

+	"echo 0" | getline

+	if ($0) printf "true "

+	else printf "false "

+	if ($1) printf "true "

+	else printf "false "

+	if ($0==$1) printf "equal\n"

+	else printf "not equal\n"

+}' >foo2

+cmp -s foo1 foo2 || echo >[1=2] 'BAD: T.getline bad $0 type in cmd|getline'

+echo 'L1

+L2' | $awk 'BEGIN { $0="old stuff"; $1="new"; getline x; print}' >foo1

+echo 'new stuff' >foo2

+cmp -s foo1 foo2 || echo >[1=2] 'BAD: T.getline bad update $0'

--- a/test/T.misc

+++ b/test/T.misc

@@ -68,11 +68,13 @@

 echo '#' >foo2

 diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad match of 8-bit char'

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

 echo hello |

-$awk 'BEGIN	{ FILENAME = "/lib/ucd/UnicodeData.txt" }

+$awk 'BEGIN	{ FILENAME = "foo.txt" }

 	{ print $0 }' >/dev/null

 if(! ~ $status '')

-	echo >[1=2] 'BAD: T.misc /lib/ucd/UnicodeData.txt dropped core'

+	echo >[1=2] 'BAD: T.misc foo.txt dropped core'

 echo hello |

 $awk '  function foo(foo) {

@@ -161,9 +163,7 @@

 	echo >[1=2] 'BAD: T.misc unireghf dropped core'

 echo x | $awk '/[/]/' >[2]foo

-grep 'nonterminated character class' foo >/dev/null || error 'BAD: T.misc nonterminated fails'

-if(! ~ $status '')

-	echo >[1=2] 'BAD: T.misc nonterminated dropped core'

+grep 'nonterminated character class' foo >/dev/null || echo 'BAD: T.misc nonterminated fails'

 $awk '

 function f() { return 12345 }

@@ -277,8 +277,6 @@

 # The following syntax error should not dump core:

 $awk '{ n = split($1, address, !); print address[1] }' >[2]foo

 grep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc split error'

-if(! ~ $status '')

-	echo >[1=2] 'BAD: T.misc split! dropped core'

 # The following should cause a syntax error message

 $awk 'BEGIN {"hello"}' >[2]foo

@@ -337,12 +335,12 @@

 echo 1 >foo1

 $awk '	{ exit }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit'

 echo 1 >foo1

 $awk '	{i = 1; while (i <= NF) {if (i == NF) exit; i++ } }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 2'

 echo 1 >foo1

@@ -350,7 +348,7 @@

 		i = 1; while (i <= NF) {if (i == NF) return NR; i++ }

 	{ if (f() == 1) exit }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc while return'

 echo 1 >foo1

@@ -359,12 +357,12 @@

 		for (i in arr) {if (i == 3) return NR; i++ }

 	{ if (f() == 1) exit }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc while return'

 echo 1 >foo1

 $awk '	{i = 1; do { if (i == NF) exit; i++ } while (i <= NF) }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 3'

 echo 1 >foo1

@@ -372,12 +370,12 @@

 		i = 1; do { if (i == NF) return NR; i++ } while (i <= NF)

 	{ if (f() == 1) exit }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc do return'

 echo 1 >foo1

 $awk '	{i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 4'

 echo 1 >foo1

@@ -385,7 +383,7 @@

 	  for (i in x) {

 	 	if (i == 1)

 			exit } }

-	END { print NR }' /lib/ucd/UnicodeData.txt >foo2

+	END { print NR }' foo.txt >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 5'

 echo XXXXXXXX >foo1

@@ -393,7 +391,7 @@

 	t = s; 	gsub("[" s "]", "X", t); print t }' >foo2

 cmp -s foo1 foo2 || echo 'BAD: T.misc weird escapes in char class'

-$awk '{}' /lib/ucd/UnicodeData.txt glop/glop >foo >[2]foo2

+$awk '{}' foo.txt glop/glop >foo >[2]foo2

 grep 'can''t open.*glop' foo2 >/dev/null || echo 'BAD: T.misc can''t open'

 echo '

--- a/test/T.overflow

+++ b/test/T.overflow

@@ -74,7 +74,7 @@

n++

 	print n

 }' >foo2

-cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'

+cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'

 echo x >foo1

 $awk '{print $40000000000000}' <foo1 >foo2 >[2]foo

--- a/test/T.redir

+++ b/test/T.redir

@@ -1,21 +1,23 @@

 #!/bin/rc

 echo T.redir: test redirections

-$awk '{ print >"foo" }' /lib/ucd/UnicodeData.txt

-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"foo")'

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

+$awk '{ print >"foo" }' foo.txt

+diff foo foo.txt || echo 'BAD: T.redir (print >"foo")'

 rm -f foo

-$awk '{ print >>"foo" }' /lib/ucd/UnicodeData.txt

-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >>"foo")'

+$awk '{ print >>"foo" }' foo.txt

+diff foo foo.txt || echo 'BAD: T.redir (print >>"foo")'

 rm -f foo

 $awk 'NR%2 == 1 { print >>"foo" }

-      NR%2 == 0 { print >"foo" }' /lib/ucd/UnicodeData.txt

-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print > and >>"foo")'

+      NR%2 == 0 { print >"foo" }' foo.txt

+diff foo foo.txt || echo 'BAD: T.redir (print > and >>"foo")'

 rm -f foo

-$awk '{ print | "cat >foo" }' /lib/ucd/UnicodeData.txt

-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print | "cat >foo")'

+$awk '{ print | "cat >foo" }' foo.txt

+diff foo foo.txt || echo 'BAD: T.redir (print | "cat >foo")'

 # tests flush of stdout before opening pipe

 echo '   head

@@ -26,10 +28,10 @@

 		print i | "sort" }' >foo2

 diff foo1 foo2 || echo 'BAD: T.redir (buffering)'

-$awk '{ print >"/fd/2" }' /lib/ucd/UnicodeData.txt >foo1 >[2]foo2

-diff foo2 /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"/fd/2")'

+$awk '{ print >"/fd/2" }' foo.txt >foo1 >[2]foo2

+diff foo2 foo.txt || echo 'BAD: T.redir (print >"/fd/2")'

 diff foo1 /dev/null  || echo 'BAD: T.redir (print >"/fd/2")'

-$awk '{ print >"/fd/1" }' /lib/ucd/UnicodeData.txt >foo1 >[2]foo2

-diff foo1 /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"/fd/1")'

+$awk '{ print >"/fd/1" }' foo.txt >foo1 >[2]foo2

+diff foo1 foo.txt || echo 'BAD: T.redir (print >"/fd/1")'

 diff foo2 /dev/null  || echo 'BAD: T.redir (print >"/fd/2")'

--- a/test/T.split

+++ b/test/T.split

@@ -51,10 +51,12 @@

 # getline var shouldn't impact fields.

+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt

 echo 'f b a' > $TEMP0

 $awk '{

 	FS = ":"

-	getline a < "/lib/ucd/UnicodeData.txt"

+	getline a < "foo.txt"

 	print $1

 }' $TEMP0 > $TEMP1

 echo 'f' > $TEMP2

@@ -101,7 +103,7 @@

      ' > $TEMP0

 who | sed 10q  >> $TEMP0

-sed 10q /lib/ucd/UnicodeData.txt >> $TEMP0

+sed 10q foo.txt >> $TEMP0

 $awk '

 {	n = split($0, x, "")

--- /dev/null

+++ b/test/T.utf

@@ -1,0 +1,191 @@

+#!/bin/rc

+echo T.utf: tests of utf functions

+$awk '

+BEGIN {

+	FS = "\t"

+	awk = ENVIRON["awk"]

+}

+NF == 0 || $1 ~ /^#/ {

+	next

+}

+$1 ~ /try/ {	# new test

+	nt++

+	sub(/try [a-zA-Z_0-9]+ /, "")

+	prog = $0

+	printf("try %3d %s\n", nt, prog)

+	prog = sprintf("%s -F''\\t'' ''%s''", awk, prog)

+	# print "prog is", prog

+	nt2 = 0

+	while (getline > 0) {

+		if (NF == 0)	# blank line terminates a sequence

+			break

+		input = $1

+		for (i = 2; i < NF; i++)	# input data

+			input = input "\t" $i

+		test = sprintf("echo ''%s'' | %s >foo1; ",

+			input, prog)

+		if ($NF == "\"\"")

+			output = ">foo2;"

+		else

+			output = sprintf("echo ''%s'' >foo2; ", $NF)

+		gsub(/\\t/, "\t", output)

+		gsub(/\\n/, "\n", output)

+		run = sprintf("diff foo1 foo2 || echo test %d.%d failed",

+			nt, ++nt2)

+		# print  "input is", input

+		# print  "test is", test

+		# print  "output is", output

+		# print  "run is", run

+		system(test output run)

+	}

+	tt += nt2

+}

+END { print tt, "tests" }

+' <<'!!!!'

+# General format:

+# try program as rest of line

+# $1	$2	$3	output1  (\t for tab, \n for newline,

+# $1	$2	$3	output2  ("" for null)

+# ... terminated by blank line

+# try another program...

+try length { print length($1) }

+	0

+a	1

+の今がその時だ	7

+Сейчас	6

+现在是时候了	6

+给所有的好男	6

+来参加聚会。	6

+😀	1

+🖕 finger	8

+Τωρα	4

+για	3

+να	2

+עכשיו	5

+לכל	3

+לבוא	4

+の今がその時だ	7

+지금이	3

+모든	2

+파티에	3

+Сейчас	6

+для	3

+прийти	6

+try index { print index($1, $2) }

+abc	a	1

+abc	b	2

+abc	x	0

+现在是时候了	""	0

+现在是时候了	了	6

+现在是时候了	在是	2

+现在是时候了	x	0

+现x在是时候了	x	2

+🖕 fingerすべての善人のためにすべての善人のために	f	3

+🖕 finger🖕	r🖕	8

+try substr { print substr($0, 2, 3) }

+abcdef	bcd

+Τωρα ειναι η	ωρα

+Τω	ω

+지금 이절호의	금 이

+xпyрийти	пyр

+try rematch { print $1 ~ $2 }

+abc	a	1

+abc	x	0

+すべての善人のために	の	1

+すべての善人のために	の.*の	1

+すべての善人のために	の.*て	0

+Τωρα	ω+	1

+# replace first occurrence of $2 by $3 in $1

+try sub { n = sub($2, $3, $1); print n, $1 }

+abcdef	bc	XYZ	1 aXYZdef

+abcdef	xy	XYZ	0 abcdef

+の今がその時だ	の	NO	1 NO今がその時だ

+🖕 finger	🖕.*g	FING	1 FINGer

+Сейчас	.	x	1 xейчас

+# replace all occurrences of $2 by $3 in $1

+try gsub { n = gsub($2, $3, $1); print n, $1 }

+abcdef	bc	XYZ	1 aXYZdef

+abcdef	xy	XYZ	0 abcdef

+の今がその時だ	の	NO	2 NO今がそNO時だ

+🖕 finger	🖕.*g	FING	1 FINGer

+Сейчас	.	x	6 xxxxxx

+try match { print match($1, $2), RSTART, RLENGTH }

+abc	[^a]	2 2 1

+abc	[^ab]	3 3 1

+すべての善人のために	[^す]	2 2 1

+すべての善人のために	[^ぁ-ゖ]	5 5 1

+abc	a	1 1 1

+abc	x	0 0 -1

+すべての善人のために	の	4 4 1

+すべての善人のために	の.*の	4 4 4

+すべての善人のために	の.*て	0 0 -1

+Τωρα	ω+	2 2 1

+Τωρα	x+	0 0 -1

+Τωρα	ω.	2 2 2

+すべての善人のために	[の]	4 4 1

+すべての善人のために	[ぁ-え]	0 0 -1

+すべての善人のために	[^ぁ-え]	1 1 1

+Τωρα ειναι η	[α-ω]	2 2 1

+Τωρα ειναι η	[α-ω]+	2 2 3

+xxxΤωρα ειναι η	[Α-Ω]	4 4 1

+για όλους τους καλούς ά	α.*α	3 3 15

+να έρθει στο πά	[^ν]	2 2 1

+# FS="" should split into unicode chars

+try emptyFS BEGIN {FS=""} {print NF}

+すべての善人のために	10

+の今がその時だ	7

+Сейчас	6

+现在是时候了	6

+给所有的好男	6

+来参加聚会。	6

+😀	1

+🖕 finger	8

+# printf(%N.Ns) for utf8 strings

+try printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}

+abcd	[   ab][ab   ]

+现在abc	[   现在][现在   ]

+现ωabc	[   现ω][现ω   ]

+ωabc	[   ωa][ωa   ]

+Сейчас	[   Се][Се   ]

+Сейxyz	[   Се][Се   ]

+😀	[    😀][😀    ]

+# printf(%N.Ns) for utf8 strings

+try printfs2 {printf("[%5s][%-5s]\n"), $1, $1}

+abcd	[ abcd][abcd ]

+现在ab	[ 现在ab][现在ab ]

+a现在ab	[a现在ab][a现在ab]

+a现在abc	[a现在abc][a现在abc]

+现ωab	[ 现ωab][现ωab ]

+ωabc	[ ωabc][ωabc ]

+Сейчас	[Сейчас][Сейчас]

+😀	[    😀][😀    ]

+# printf(%N.Ns) for utf8 strings

+try printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}

+abcd	[ab][ab]

+现在abc	[现在][现在]

+现ωabc	[现ω][现ω]

+ω	[ω][ω]

+😀	[😀][😀]

+# printf(%c) for utf

+try printfc {printf("%c %c\n", $1, substr($1,2,1))}

+すべての善人のために	す べ

+の今がその時だ	の 今

+Сейчас	С е

+现在是时候了	现 在

+😀🖕	😀 🖕

+!!!!

--- /dev/null

+++ b/test/chem.awk

@@ -1,0 +1,492 @@

+BEGIN {

+	macros = "/usr/bwk/chem/chem.macros"	# CHANGE ME!!!!!

+	macros = "/dev/null" # since originals are lost

+	pi = 3.141592654

+	deg = 57.29578

+	setparams(1.0)

+	set(dc, "up 0 right 90 down 180 left 270 ne 45 se 135 sw 225 nw 315")

+	set(dc, "0 n 30 ne 45 ne 60 ne 90 e 120 se 135 se 150 se 180 s")

+	set(dc, "300 nw 315 nw 330 nw 270 w 210 sw 225 sw 240 sw")

+}

+function init() {

+	printf ".PS\n"

+	if (firsttime++ == 0) {

+		printf "copy \"%s\"\n", macros

+		printf "\ttextht = %g; textwid = .1; cwid = %g\n", textht, cwid

+		printf "\tlineht = %g; linewid = %g\n", lineht, linewid

+	}

+	printf "Last: 0,0\n"

+	RING = "R"; MOL = "M"; BOND = "B"; OTHER = "O"	# manifests

+	last = OTHER

+	dir = 90

+}

+function setparams(scale) {

+	lineht = scale * 0.2

+	linewid = scale * 0.2

+	textht = scale * 0.16

+	db = scale * 0.2		# bond length

+	cwid = scale * 0.12		# character width

+	cr = scale * 0.08		# rad of invis circles at ring vertices

+	crh = scale * 0.16		# ht of invis ellipse at ring vertices

+	crw = scale * 0.12		# wid

+	dav = scale * 0.015		# vertical shift up for atoms in atom macro

+	dew = scale * 0.02		# east-west shift for left of/right of

+	ringside = scale * 0.3		# side of all rings

+	dbrack = scale * 0.1		# length of bottom of bracket

+}

+	{ lineno++ }

+/^(\.cstart)|(begin chem)/	{ init(); inchem = 1; next }

+/^(\.cend)|(end)/		{ inchem = 0; print ".PE"; next }

+/^\./		{ print; next }		# troff

+inchem == 0	{ print; next }		# everything else

+$1 == "pic"	{ shiftfields(1); print; next }	# pic pass-thru

+$1 ~ /^#/	{ next }	# comment

+$1 == "textht"	{ textht = $NF; next }

+$1 == "cwid"	{ cwid = $NF; next }

+$1 == "db"	{ db = $NF; next }

+$1 == "size"	{ if ($NF <= 4) size = $NF; else size = $NF/10

+		  setparams(size); next }

+	{ print "\n#", $0 }	# debugging, etc.

+	{ lastname = "" }

+$1 ~ /^[A-Z].*:$/ {	# label;  falls thru after shifting left

+	lastname = substr($1, 1, length($1)-1)

+	print $1

+	shiftfields(1)

+}

+$1 ~ /^\"/	{ print "Last: ", $0; last = OTHER; next }

+$1 ~ /bond/	{ bond($1); next }

+$1 ~ /^(double|triple|front|back)$/ && $2 == "bond" {

+		   $1 = $1 $2; shiftfields(2); bond($1); next }

+$1 == "aromatic" { temp = $1; $1 = $2; $2 = temp }

+$1 ~ /ring|benz/ { ring($1); next }

+$1 == "methyl"	{ $1 = "CH3" }	# left here as an example

+$1 ~ /^[A-Z]/	{ molecule(); next }

+$1 == "left"	{ left[++stack] = fields(2, NF); printf("Last: [\n"); next }

+$1 == "right"	{ bracket(); stack--; next }

+$1 == "label"	{ label(); next }

+/./	{ print "Last: ", $0; last = OTHER }

+END	{ if (firsttime == 0) error("did you forget .cstart and .cend?")

+	  if (inchem) printf ".PE\n"

+}

+function bond(type,	i, goes, from) {

+	goes = ""

+	for (i = 2; i <= NF; i++)

+		if ($i == ";") {

+			goes = $(i+1)

+			NF = i - 1

+			break

+		}

+	leng = db

+	from = ""

+	for (cf = 2; cf <= NF; ) {

+		if ($cf ~ /(\+|-)?[0-9]+|up|down|right|left|ne|se|nw|sw/)

+			dir = cvtdir(dir)

+		else if ($cf ~ /^leng/) {

+			leng = $(cf+1)

+			cf += 2

+		} else if ($cf == "to") {

+			leng = 0

+			from = fields(cf, NF)

+			break

+		} else if ($cf == "from") {

+			from = dofrom()

+			break

+		} else if ($cf ~ /^#/) {

+			cf = NF+1

+			break;

+		} else {

+			from = fields(cf, NF)

+			break

+		}

+	}

+	if (from ~ /( to )|^to/)	# said "from ... to ...", so zap length

+		leng = 0

+	else if (from == "")	# no from given at all

+		from = "from Last." leave(last, dir) " " fields(cf, NF)

+	printf "Last: %s(%g, %g, %s)\n", type, leng, dir, from

+	last = BOND

+	if (lastname != "")

+		labsave(lastname, last, dir)

+	if (goes) {

+		$0 = goes

+		molecule()

+	}

+}

+function dofrom(	n, s) {

+	cf++	# skip "from"

+	n = $cf

+	if (n in labtype)	# "from Thing" => "from Thing.V.s"

+		return "from " n "." leave(labtype[n], dir)

+	if (n ~ /^\.[A-Z]/)	# "from .V" => "from Last.V.s"

+		return "from Last" n "." corner(dir)

+	if (n ~ /^[A-Z][^.]*\.[A-Z][^.]*$/)	# "from X.V" => "from X.V.s"

+		return "from " n "." corner(dir)

+	return fields(cf-1, NF)

+}

+function bracket(	t) {

+	printf("]\n")

+	if ($2 == ")")

+		t = "spline"

+	else

+		t = "line"

+	printf("%s from last [].sw+(%g,0) to last [].sw to last [].nw to last [].nw+(%g,0)\n",

+		t, dbrack, dbrack)

+	printf("%s from last [].se-(%g,0) to last [].se to last [].ne to last [].ne-(%g,0)\n",

+		t, dbrack, dbrack)

+	if ($3 == "sub")

+		printf("\" %s\" ljust at last [].se\n", fields(4,NF))

+}

+function molecule(	n, type) {

+	n = $1

+	if (n == "BP") {

+		$1 = "\"\" ht 0 wid 0"

+		type = OTHER

+	} else {

+		$1 = atom(n)

+		type = MOL

+	}

+	gsub(/[^A-Za-z0-9]/, "", n)	# for stuff like C(OH3): zap non-alnum

+	if ($2 == "")

+		printf "Last: %s: %s with .%s at Last.%s\n", \

+			n, $0, leave(type,dir+180), leave(last,dir)

+	else if ($2 == "below")

+		printf("Last: %s: %s with .n at %s.s\n", n, $1, $3)

+	else if ($2 == "above")

+		printf("Last: %s: %s with .s at %s.n\n", n, $1, $3)

+	else if ($2 == "left" && $3 == "of")

+		printf("Last: %s: %s with .e at %s.w+(%g,0)\n", n, $1, $4, dew)

+	else if ($2 == "right" && $3 == "of")

+		printf("Last: %s: %s with .w at %s.e-(%g,0)\n", n, $1, $4, dew)

+	else

+		printf "Last: %s: %s\n", n, $0

+	last = type

+	if (lastname != "")

+		labsave(lastname, last, dir)

+	labsave(n, last, dir)

+}

+function label(	i, v) {

+	if (substr(labtype[$2], 1, 1) != RING)

+		error(sprintf("%s is not a ring", $2))

+	else {

+		v = substr(labtype[$2], 2, 1)

+		for (i = 1; i <= v; i++)

+			printf("\"\\s-3%d\\s0\" at 0.%d<%s.C,%s.V%d>\n", i, v+2, $2, $2, i)

+	}

+}

+function ring(type,	typeint, pt, verts, i) {

+	pt = 0	# points up by default

+	if (type ~ /[1-8]$/)

+		verts = substr(type, length(type), 1)

+	else if (type ~ /flat/)

+		verts = 5

+	else

+		verts = 6

+	fused = other = ""

+	for (i = 1; i <= verts; i++)

+		put[i] = dbl[i] = ""

+	nput = aromatic = withat = 0

+	for (cf = 2; cf <= NF; ) {

+		if ($cf == "pointing")

+			pt = cvtdir(0)

+		else if ($cf == "double" || $cf == "triple")

+			dblring(verts)

+		else if ($cf ~ /arom/) {

+			aromatic++

+			cf++	# handled later

+		} else if ($cf == "put") {

+			putring(verts)

+			nput++

+		} else if ($cf ~ /^#/) {

+			cf = NF+1

+			break;

+		} else {

+			if ($cf == "with" || $cf == "at")

+				withat = 1

+			other = other " " $cf

+			cf++

+		}

+	}

+	typeint = RING verts pt		# RING | verts | dir

+	if (withat == 0)

+		fused = joinring(typeint, dir, last)

+	printf "Last: [\n"

+	makering(type, pt, verts)

+	printf "] %s %s\n", fused, other

+	last = typeint

+	if (lastname != "")

+		labsave(lastname, last, dir)

+}

+function makering(type, pt, v,       i, a, r) {

+	if (type ~ /flat/)

+		v = 6

+    # vertices

+	r = ringside / (2 * sin(pi/v))

+	printf "\tC: 0,0\n"

+	for (i = 0; i <= v+1; i++) {

+		a = ((i-1) / v * 360 + pt) / deg

+		printf "\tV%d: (%g,%g)\n", i, r * sin(a), r * cos(a)

+	}

+	if (type ~ /flat/) {

+		printf "\tV4: V5; V5: V6\n"

+		v = 5

+	}

+    # sides

+	if (nput > 0) {	# hetero ...

+		for (i = 1; i <= v; i++) {

+			c1 = c2 = 0

+			if (put[i] != "") {

+				printf("\tV%d: ellipse invis ht %g wid %g at V%d\n",

+					i, crh, crw, i)

+				printf("\t%s at V%d\n", put[i], i)

+				c1 = cr

+			}

+			j = i+1

+			if (j > v)

+				j = 1

+			if (put[j] != "")

+				c2 = cr

+			printf "\tline from V%d to V%d chop %g chop %g\n", i, j, c1, c2

+			if (dbl[i] != "") {	# should check i<j

+				if (type ~ /flat/ && i == 3) {

+					rat = 0.75; fix = 5

+				} else {

+					rat = 0.85; fix = 1.5

+				}

+				if (put[i] == "")

+					c1 = 0

+				else

+					c1 = cr/fix

+				if (put[j] == "")

+					c2 = 0

+				else

+					c2 = cr/fix

+				printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",

+					rat, i, rat, j, c1, c2

+				if (dbl[i] == "triple")

+					printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",

+						2-rat, i, 2-rat, j, c1, c2

+			}

+		}

+	} else {	# regular

+		for (i = 1; i <= v; i++) {

+			j = i+1

+			if (j > v)

+				j = 1

+			printf "\tline from V%d to V%d\n", i, j

+			if (dbl[i] != "") {	# should check i<j

+				if (type ~ /flat/ && i == 3) {

+					rat = 0.75

+				} else

+					rat = 0.85

+				printf "\tline from %g<C,V%d> to %g<C,V%d>\n",

+					rat, i, rat, j

+				if (dbl[i] == "triple")

+					printf "\tline from %g<C,V%d> to %g<C,V%d>\n",

+						2-rat, i, 2-rat, j

+			}

+		}

+	}

+	# punt on triple temporarily

+    # circle

+	if (type ~ /benz/ || aromatic > 0) {

+		if (type ~ /flat/)

+			r *= .4

+		else

+			r *= .5

+		printf "\tcircle rad %g at 0,0\n", r

+	}

+}

+function putring(v) {	# collect "put Mol at n"

+	cf++

+	mol = $(cf++)

+	if ($cf == "at")

+		cf++

+	if ($cf >= 1 && $cf <= v) {

+		m = mol

+		gsub(/[^A-Za-z0-9]/, "", m)

+		put[$cf] = m ":" atom(mol)

+	}

+	cf++

+}

+function joinring(type, dir, last) {	# join a ring to something

+	if (substr(last, 1, 1) == RING) {	# ring to ring

+		if (substr(type, 3) == substr(last, 3))	# fails if not 6-sided

+			return "with .V6 at Last.V2"

+	}

+	# if all else fails

+	return sprintf("with .%s at Last.%s", \

+		leave(type,dir+180), leave(last,dir))

+}

+function leave(last, d,		c, c1) {	# return vertex of last in dir d

+	if (last == BOND)

+		return "end"

+	d = reduce(d)

+	if (substr(last, 1, 1) == RING)

+		return ringleave(last, d)

+	if (last == MOL) {

+		if (d == 0 || d == 180)

+			c = "C"

+		else if (d > 0 && d < 180)

+			c = "R"

+		else

+			c = "L"

+		if (d in dc)

+			c1 = dc[d]

+		else

+			c1 = corner(d)

+		return sprintf("%s.%s", c, c1)

+	}

+	if (last == OTHER)

+		return corner(d)

+	return "c"

+}

+function ringleave(last, d,	rd, verts) {	# return vertex of ring in dir d

+	verts = substr(last, 2, 1)

+	rd = substr(last, 3)

+	return sprintf("V%d.%s", int(reduce(d-rd)/(360/verts)) + 1, corner(d))

+}

+function corner(dir) {

+	return dc[reduce(45 * int((dir+22.5)/45))]

+}

+function labsave(name, type, dir) {

+	labtype[name] = type

+	labdir[name] = dir

+}

+function dblring(v,	d, v1, v2) {	# should canonicalize to i,i+1 mod v

+	d = $cf

+	for (cf++; $cf ~ /^[1-9]/; cf++) {

+		v1 = substr($cf,1,1)

+		v2 = substr($cf,3,1)

+		if (v2 == v1+1 || v1 == v && v2 == 1)	# e.g., 2,3 or 5,1

+			dbl[v1] = d

+		else if (v1 == v2+1 || v2 == v && v1 == 1)	# e.g., 3,2 or 1,5

+			dbl[v2] = d

+		else

+			error(sprintf("weird %s bond in\n\t%s", d, $0))

+	}

+}

+function cvtdir(d) {	# maps "[pointing] somewhere" to degrees

+	if ($cf == "pointing")

+		cf++

+	if ($cf ~ /^[+\-]?[0-9]+/)

+		return reduce($(cf++))

+	else if ($cf ~ /left|right|up|down|ne|nw|se|sw/)

+		return reduce(dc[$(cf++)])

+	else {

+		cf++

+		return d

+	}

+}

+function reduce(d) {	# reduces d to 0 <= d < 360

+	while (d >= 360)

+		d -= 360

+	while (d < 0)

+		d += 360

+	return d

+}

+function atom(s,    c, i, n, nsub, cloc, nsubc) { # convert CH3 to atom(...)

+	if (s == "\"\"")

+		return s

+	n = length(s)

+	nsub = nsubc = 0

+	cloc = index(s, "C")

+	if (cloc == 0)

+		cloc = 1

+	for (i = 1; i <= n; i++)

+		if (substr(s, i, 1) !~ /[A-Z]/) {

+			nsub++

+			if (i < cloc)

+				nsubc++

+		}

+	gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s)

+	if (s ~ /([^0-9]\.)|(\.[^0-9])/)	# centered dot

+		gsub(/\./, "\\v#-.3m#.\\v#.3m#", s)

+	return sprintf("atom(\"%s\", %g, %g, %g, %g, %g, %g)",

+		s, (n-nsub/2)*cwid, textht, (cloc-nsubc/2-0.5)*cwid, crh, crw, dav)

+}

+function in_line(	i, n, s, s1, os) {

+	s = $0

+	os = ""

+	while ((n = match(s, /!?[A-Z][A-Za-z]*(([0-9]+\.[0-9]+)|([0-9]+))/)) > 0) {

+		os = os substr(s, 1, n-1)	# prefix

+		s1 = substr(s, n, RLENGTH)	# molecule

+		if (substr(s1, 1, 1) == "!") {	# !mol => leave alone

+			s1 = substr(s1, 2)

+		} else {

+			gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s1)

+			if (s1 ~ /([^0-9]\.)|(\.[^0-9])/)	# centered dot

+				gsub(/\./, "\\v#-.3m#.\\v#.3m#", s1)

+		}

+		os = os s1

+		s = substr(s, n + RLENGTH)	# tail

+	}

+	os = os s

+	print os

+	return

+}

+function shiftfields(n,		i) {	# move $n+1..$NF to $n..$NF-1, zap $NF

+	for (i = n; i < NF; i++)

+		$i = $(i+1)

+	$NF = ""

+	NF--

+}

+function fields(n1, n2,		i, s) {

+	if (n1 > n2)

+		return ""

+	s = ""

+	for (i = n1; i <= n2; i++) {

+		if ($i ~ /^#/)

+			break;

+		s = s $i " "

+	}

+	return s

+}

+function set(a, s,     i, n, q) {

+	n = split(s, q)

+	for (i = 1; i <= n; i += 2)

+		a[q[i]] = q[i+1]

+}

+function error(s) {

+	printf "chem\007: error on line %d: %s\n", lineno, s | "cat 1>&2"

+}

--

⑨