ref: f01859c469dac3ed01d36a656cce03f0b63abae1
parent: 3923757ccbde3af0f253824bc9cf9a99127178fb
author: qwx <qwx@sciops.net>
date: Mon Sep 29 06:34:26 EDT 2025
test: add more tests and missing files, use only part of ucd
--- a/test/T.-f-f
+++ b/test/T.-f-f
@@ -8,15 +8,17 @@
echo xxx | $awk -f foo1 -f foo2 >foo3
diff foo foo3 || echo 'BAD: T.-f-f multiple -fs'
-echo '/a/' | $awk -f - /lib/ucd/UnicodeData.txt >foo1
-$awk '/a/' /lib/ucd/UnicodeData.txt >foo2
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
+echo '/a/' | $awk -f - foo.txt >foo1
+$awk '/a/' foo.txt >foo2
diff foo1 foo2 || echo 'BAD: T.-f-f -f -'
-cp /lib/ucd/UnicodeData.txt foo1
+cp foo.txt foo1
echo '/./ {' >foo2echo 'print' >foo3
echo '}' >foo4
-$awk -f foo2 -f foo3 -f foo4 /lib/ucd/UnicodeData.txt >foo5
+$awk -f foo2 -f foo3 -f foo4 foo.txt >foo5
diff foo1 foo5 || echo 'BAD: T.-f-f 3 files'
echo '/./ {' >foo2@@ -26,5 +28,5 @@
]' >foo4
-$awk -f foo2 -f foo3 -f foo4 /lib/ucd/UnicodeData.txt >foo5 >[2]foo6
+$awk -f foo2 -f foo3 -f foo4 foo.txt >foo5 >[2]foo6
grep 'syntax error.*at foo4:' foo6 >/dev/null >[2=1] || echo 'BAD: T.-f-f source file name'
--- a/test/T.close
+++ b/test/T.close
@@ -1,9 +1,11 @@
#!/bin/rc
echo T.close: test close built-in
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
rm -f foo
-$awk '{ print >>"foo"; close("foo") }' /lib/ucd/UnicodeData.txt-diff /lib/ucd/UnicodeData.txt foo || echo 'BAD: T.close (1)'
+$awk '{ print >>"foo"; close("foo") }' foo.txt+diff foo.txt foo || echo 'BAD: T.close (1)'
ls -l >foo
tail -1 foo >foo1
@@ -12,7 +14,7 @@
echo 0 >foo1
$awk ' # non-accessible file
- BEGIN { getline <"/lib/ucd/UnicodeData.txt"; print close("/lib/ucd/UnicodeData.txt"); }+ BEGIN { getline <"foo.txt"; print close("foo.txt"); }' >foo2
diff foo1 foo2 || echo 'BAD: T.close (3)'
--- a/test/T.clv
+++ b/test/T.clv
@@ -134,11 +134,13 @@
$awk -f foo0 '-vx=123' '-vy=abc' '-vz1=10.99' /dev/null 'x=4567' /dev/null >foo2
diff foo1 foo2 || echo 'BAD: T.clv (x=15a)'
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
echo 'BEGIN { print x, y, z1 } NR==1 { print x }' >foo0echo '123 abc 10.99
4567' >foo1
-$awk -v 'x=123' -v 'y=abc' -v 'z1=10.99' -f foo0 'x=4567' /lib/ucd/UnicodeData.txt >foo2
+$awk -v 'x=123' -v 'y=abc' -v 'z1=10.99' -f foo0 'x=4567' foo.txt >foo2
diff foo1 foo2 || echo 'BAD: T.clv (x=16)'
echo 'BEGIN { print x, y, z1 }@@ -145,11 +147,9 @@
NR==1 { print x }' >foo0echo '123 abc 10.99
4567' >foo1
-$awk '-vx=123' '-vy=abc' '-vz1=10.99' -f foo0 'x=4567' /lib/ucd/UnicodeData.txt >foo2
+$awk '-vx=123' '-vy=abc' '-vz1=10.99' -f foo0 'x=4567' foo.txt >foo2
diff foo1 foo2 || echo 'BAD: T.clv (x=16a)'
-
-
# special chars in commandline assigned value;
# have to use local echo to avoid quoting problems.
@@ -166,10 +166,6 @@
$awk 'BEGIN { printf("a%c%c%cz\n", "\b", "\r", "\f") }' >foo1 echo 'hello' | $awk '{print x}' 'x=a\b\r\fz' >foo2diff foo1 foo2 || echo 'BAD: T.clv (x=19)'
-
-
-### newer -v tests
-
$awk -vx 'BEGIN {print x}' >foo >[2=1]grep 'invalid -v option argument: x' foo >/dev/null || echo 'BAD: T.clv (x=20)'
--- /dev/null
+++ b/test/T.getline
@@ -1,0 +1,99 @@
+#!/bin/rc
+echo T.getline: test getline function
+
+who >foo1
+cat foo1 | $awk '
+BEGIN {+ while (getline)
+ exit
+}
+' >foo
+cmp -s foo1 foo || echo 'BAD: T.getline (bare getline)'
+
+who >foo1
+cat foo1 | $awk '
+BEGIN {+ while (getline xxx)
+ print xxx
+ exit
+}
+' >foo
+cmp -s foo1 foo || echo 'BAD: T.getline (getline xxx)'
+
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
+$awk '
+BEGIN {+ while (getline <"foo.txt")
+ exit
+}
+' >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <file)'
+
+cat foo.txt | $awk '
+BEGIN {+ while (getline <"-") # stdin
+ exit
+}
+' >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <"-")'
+
+$awk '
+BEGIN {+ while (getline <ARGV[1])
+ exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline <arg)'
+
+$awk '
+BEGIN {+ while (getline x <ARGV[1])
+ print x
+ exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (getline x <arg)'
+
+$awk '
+BEGIN {+ while (("cat " ARGV[1]) | getline)+ exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (cat arg | getline)'
+
+$awk '
+BEGIN {+ while (("cat " ARGV[1]) | getline x)+ print x
+ exit
+}
+' foo.txt >foo
+cmp -s foo.txt foo || echo 'BAD: T.getline (cat arg | getline x)'
+
+$awk ' BEGIN { print getline <"/glop/glop/glop" } ' >foo+echo '-1' >foo1
+cmp -s foo foo1 || echo 'BAD: T.getline (non-existent file)'
+
+echo 'false false equal' >foo1
+$awk 'BEGIN {+ "echo 0" | getline
+ if ($0) printf "true "
+ else printf "false "
+ if ($1) printf "true "
+ else printf "false "
+ if ($0==$1) printf "equal\n"
+ else printf "not equal\n"
+}' >foo2
+cmp -s foo1 foo2 || echo >[1=2] 'BAD: T.getline bad $0 type in cmd|getline'
+
+echo 'L1
+L2' | $awk 'BEGIN { $0="old stuff"; $1="new"; getline x; print}' >foo1+echo 'new stuff' >foo2
+cmp -s foo1 foo2 || echo >[1=2] 'BAD: T.getline bad update $0'
--- a/test/T.misc
+++ b/test/T.misc
@@ -68,11 +68,13 @@
echo '#' >foo2
diff foo1 foo2 || echo >[1=2] 'BAD: T.misc bad match of 8-bit char'
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
echo hello |
-$awk 'BEGIN { FILENAME = "/lib/ucd/UnicodeData.txt" }+$awk 'BEGIN { FILENAME = "foo.txt" } { print $0 }' >/dev/nullif(! ~ $status '')
- echo >[1=2] 'BAD: T.misc /lib/ucd/UnicodeData.txt dropped core'
+ echo >[1=2] 'BAD: T.misc foo.txt dropped core'
echo hello |
$awk ' function foo(foo) {@@ -161,9 +163,7 @@
echo >[1=2] 'BAD: T.misc unireghf dropped core'
echo x | $awk '/[/]/' >[2]foo
-grep 'nonterminated character class' foo >/dev/null || error 'BAD: T.misc nonterminated fails'
-if(! ~ $status '')
- echo >[1=2] 'BAD: T.misc nonterminated dropped core'
+grep 'nonterminated character class' foo >/dev/null || echo 'BAD: T.misc nonterminated fails'
$awk '
function f() { return 12345 }@@ -277,8 +277,6 @@
# The following syntax error should not dump core:
$awk '{ n = split($1, address, !); print address[1] }' >[2]foogrep 'illegal statement' foo >/dev/null || echo 'BAD: T.misc split error'
-if(! ~ $status '')
- echo >[1=2] 'BAD: T.misc split! dropped core'
# The following should cause a syntax error message
$awk 'BEGIN {"hello"}' >[2]foo@@ -337,12 +335,12 @@
echo 1 >foo1
$awk ' { exit }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit'
echo 1 >foo1
$awk ' {i = 1; while (i <= NF) {if (i == NF) exit; i++ } }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 2'
echo 1 >foo1
@@ -350,7 +348,7 @@
i = 1; while (i <= NF) {if (i == NF) return NR; i++ }}
{ if (f() == 1) exit }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
echo 1 >foo1
@@ -359,12 +357,12 @@
for (i in arr) {if (i == 3) return NR; i++ }}
{ if (f() == 1) exit }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc while return'
echo 1 >foo1
$awk ' {i = 1; do { if (i == NF) exit; i++ } while (i <= NF) }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 3'
echo 1 >foo1
@@ -372,12 +370,12 @@
i = 1; do { if (i == NF) return NR; i++ } while (i <= NF)}
{ if (f() == 1) exit }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc do return'
echo 1 >foo1
$awk ' {i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit }- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 4'
echo 1 >foo1
@@ -385,7 +383,7 @@
for (i in x) {if (i == 1)
exit } }
- END { print NR }' /lib/ucd/UnicodeData.txt >foo2+ END { print NR }' foo.txt >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc immmediate exit 5'
echo XXXXXXXX >foo1
@@ -393,7 +391,7 @@
t = s; gsub("[" s "]", "X", t); print t }' >foo2cmp -s foo1 foo2 || echo 'BAD: T.misc weird escapes in char class'
-$awk '{}' /lib/ucd/UnicodeData.txt glop/glop >foo >[2]foo2+$awk '{}' foo.txt glop/glop >foo >[2]foo2grep 'can''t open.*glop' foo2 >/dev/null || echo 'BAD: T.misc can''t open'
echo '
--- a/test/T.overflow
+++ b/test/T.overflow
@@ -74,7 +74,7 @@
n++
print n
}' >foo2
-cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'
+cmp -s foo1 foo2 || echo 'BAD: T.overflow big array'
echo x >foo1
$awk '{print $40000000000000}' <foo1 >foo2 >[2]foo--- a/test/T.redir
+++ b/test/T.redir
@@ -1,21 +1,23 @@
#!/bin/rc
echo T.redir: test redirections
-$awk '{ print >"foo" }' /lib/ucd/UnicodeData.txt-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"foo")'
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+$awk '{ print >"foo" }' foo.txt+diff foo foo.txt || echo 'BAD: T.redir (print >"foo")'
+
rm -f foo
-$awk '{ print >>"foo" }' /lib/ucd/UnicodeData.txt-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >>"foo")'
+$awk '{ print >>"foo" }' foo.txt+diff foo foo.txt || echo 'BAD: T.redir (print >>"foo")'
rm -f foo
$awk 'NR%2 == 1 { print >>"foo" }- NR%2 == 0 { print >"foo" }' /lib/ucd/UnicodeData.txt-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print > and >>"foo")'
+ NR%2 == 0 { print >"foo" }' foo.txt+diff foo foo.txt || echo 'BAD: T.redir (print > and >>"foo")'
rm -f foo
-$awk '{ print | "cat >foo" }' /lib/ucd/UnicodeData.txt-diff foo /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print | "cat >foo")'
+$awk '{ print | "cat >foo" }' foo.txt+diff foo foo.txt || echo 'BAD: T.redir (print | "cat >foo")'
# tests flush of stdout before opening pipe
echo ' head
@@ -26,10 +28,10 @@
print i | "sort" }' >foo2
diff foo1 foo2 || echo 'BAD: T.redir (buffering)'
-$awk '{ print >"/fd/2" }' /lib/ucd/UnicodeData.txt >foo1 >[2]foo2-diff foo2 /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"/fd/2")'
+$awk '{ print >"/fd/2" }' foo.txt >foo1 >[2]foo2+diff foo2 foo.txt || echo 'BAD: T.redir (print >"/fd/2")'
diff foo1 /dev/null || echo 'BAD: T.redir (print >"/fd/2")'
-$awk '{ print >"/fd/1" }' /lib/ucd/UnicodeData.txt >foo1 >[2]foo2-diff foo1 /lib/ucd/UnicodeData.txt || echo 'BAD: T.redir (print >"/fd/1")'
+$awk '{ print >"/fd/1" }' foo.txt >foo1 >[2]foo2+diff foo1 foo.txt || echo 'BAD: T.redir (print >"/fd/1")'
diff foo2 /dev/null || echo 'BAD: T.redir (print >"/fd/2")'
--- a/test/T.split
+++ b/test/T.split
@@ -51,10 +51,12 @@
# getline var shouldn't impact fields.
+sed 1000q /lib/ucd/UnicodeData.txt >foo.txt
+
echo 'f b a' > $TEMP0
$awk '{FS = ":"
- getline a < "/lib/ucd/UnicodeData.txt"
+ getline a < "foo.txt"
print $1
}' $TEMP0 > $TEMP1
echo 'f' > $TEMP2
@@ -101,7 +103,7 @@
' > $TEMP0
who | sed 10q >> $TEMP0
-sed 10q /lib/ucd/UnicodeData.txt >> $TEMP0
+sed 10q foo.txt >> $TEMP0
$awk '
{ n = split($0, x, "")--- /dev/null
+++ b/test/T.utf
@@ -1,0 +1,191 @@
+#!/bin/rc
+echo T.utf: tests of utf functions
+
+$awk '
+BEGIN {+ FS = "\t"
+ awk = ENVIRON["awk"]
+}
+NF == 0 || $1 ~ /^#/ {+ next
+}
+$1 ~ /try/ { # new test+ nt++
+ sub(/try [a-zA-Z_0-9]+ /, "")
+ prog = $0
+ printf("try %3d %s\n", nt, prog)+ prog = sprintf("%s -F''\\t'' ''%s''", awk, prog)+ # print "prog is", prog
+ nt2 = 0
+ while (getline > 0) {+ if (NF == 0) # blank line terminates a sequence
+ break
+ input = $1
+ for (i = 2; i < NF; i++) # input data
+ input = input "\t" $i
+ test = sprintf("echo ''%s'' | %s >foo1; ",+ input, prog)
+ if ($NF == "\"\"")
+ output = ">foo2;"
+ else
+ output = sprintf("echo ''%s'' >foo2; ", $NF)+ gsub(/\\t/, "\t", output)
+ gsub(/\\n/, "\n", output)
+ run = sprintf("diff foo1 foo2 || echo test %d.%d failed",+ nt, ++nt2)
+ # print "input is", input
+ # print "test is", test
+ # print "output is", output
+ # print "run is", run
+ system(test output run)
+ }
+ tt += nt2
+}
+END { print tt, "tests" }+' <<'!!!!'
+# General format:
+# try program as rest of line
+# $1 $2 $3 output1 (\t for tab, \n for newline,
+# $1 $2 $3 output2 ("" for null)+# ... terminated by blank line
+
+# try another program...
+
+try length { print length($1) }+ 0
+a 1
+の今がその時だ 7
+Сейчас 6
+现在是时候了 6
+给所有的好男 6
+来参加聚会。 6
+😀 1
+🖕 finger 8
+Τωρα 4
+για 3
+να 2
+עכשיו 5
+לכל 3
+לבוא 4
+の今がその時だ 7
+지금이 3
+모든 2
+파티에 3
+Сейчас 6
+для 3
+прийти 6
+
+try index { print index($1, $2) }+abc a 1
+abc b 2
+abc x 0
+现在是时候了 "" 0
+现在是时候了 了 6
+现在是时候了 在是 2
+现在是时候了 x 0
+现x在是时候了 x 2
+🖕 fingerすべての善人のためにすべての善人のために f 3
+🖕 finger🖕 r🖕 8
+
+try substr { print substr($0, 2, 3) }+abcdef bcd
+Τωρα ειναι η ωρα
+Τω ω
+지금 이절호의 금 이
+xпyрийти пyр
+
+try rematch { print $1 ~ $2 }+abc a 1
+abc x 0
+すべての善人のために の 1
+すべての善人のために の.*の 1
+すべての善人のために の.*て 0
+Τωρα ω+ 1
+
+# replace first occurrence of $2 by $3 in $1
+try sub { n = sub($2, $3, $1); print n, $1 }+abcdef bc XYZ 1 aXYZdef
+abcdef xy XYZ 0 abcdef
+の今がその時だ の NO 1 NO今がその時だ
+🖕 finger 🖕.*g FING 1 FINGer
+Сейчас . x 1 xейчас
+
+# replace all occurrences of $2 by $3 in $1
+try gsub { n = gsub($2, $3, $1); print n, $1 }+abcdef bc XYZ 1 aXYZdef
+abcdef xy XYZ 0 abcdef
+の今がその時だ の NO 2 NO今がそNO時だ
+🖕 finger 🖕.*g FING 1 FINGer
+Сейчас . x 6 xxxxxx
+
+try match { print match($1, $2), RSTART, RLENGTH }+abc [^a] 2 2 1
+abc [^ab] 3 3 1
+すべての善人のために [^す] 2 2 1
+すべての善人のために [^ぁ-ゖ] 5 5 1
+abc a 1 1 1
+abc x 0 0 -1
+すべての善人のために の 4 4 1
+すべての善人のために の.*の 4 4 4
+すべての善人のために の.*て 0 0 -1
+Τωρα ω+ 2 2 1
+Τωρα x+ 0 0 -1
+Τωρα ω. 2 2 2
+すべての善人のために [の] 4 4 1
+すべての善人のために [ぁ-え] 0 0 -1
+すべての善人のために [^ぁ-え] 1 1 1
+Τωρα ειναι η [α-ω] 2 2 1
+Τωρα ειναι η [α-ω]+ 2 2 3
+xxxΤωρα ειναι η [Α-Ω] 4 4 1
+για όλους τους καλούς ά α.*α 3 3 15
+να έρθει στο πά [^ν] 2 2 1
+
+# FS="" should split into unicode chars
+try emptyFS BEGIN {FS=""} {print NF}+すべての善人のために 10
+の今がその時だ 7
+Сейчас 6
+现在是时候了 6
+给所有的好男 6
+来参加聚会。 6
+😀 1
+🖕 finger 8
+
+# printf(%N.Ns) for utf8 strings
+try printfs1 {printf("[%5.2s][%-5.2s]\n"), $1, $1}+abcd [ ab][ab ]
+现在abc [ 现在][现在 ]
+现ωabc [ 现ω][现ω ]
+ωabc [ ωa][ωa ]
+Сейчас [ Се][Се ]
+Сейxyz [ Се][Се ]
+😀 [ 😀][😀 ]
+
+# printf(%N.Ns) for utf8 strings
+try printfs2 {printf("[%5s][%-5s]\n"), $1, $1}+abcd [ abcd][abcd ]
+现在ab [ 现在ab][现在ab ]
+a现在ab [a现在ab][a现在ab]
+a现在abc [a现在abc][a现在abc]
+现ωab [ 现ωab][现ωab ]
+ωabc [ ωabc][ωabc ]
+Сейчас [Сейчас][Сейчас]
+😀 [ 😀][😀 ]
+
+# printf(%N.Ns) for utf8 strings
+try printfs3 {printf("[%.2s][%-.2s]\n"), $1, $1}+abcd [ab][ab]
+现在abc [现在][现在]
+现ωabc [现ω][现ω]
+ω [ω][ω]
+😀 [😀][😀]
+
+# printf(%c) for utf
+try printfc {printf("%c %c\n", $1, substr($1,2,1))}+すべての善人のために す べ
+の今がその時だ の 今
+Сейчас С е
+现在是时候了 现 在
+😀🖕 😀 🖕
+
+!!!!
--- /dev/null
+++ b/test/chem.awk
@@ -1,0 +1,492 @@
+BEGIN {+ macros = "/usr/bwk/chem/chem.macros" # CHANGE ME!!!!!
+ macros = "/dev/null" # since originals are lost
+
+ pi = 3.141592654
+ deg = 57.29578
+ setparams(1.0)
+ set(dc, "up 0 right 90 down 180 left 270 ne 45 se 135 sw 225 nw 315")
+ set(dc, "0 n 30 ne 45 ne 60 ne 90 e 120 se 135 se 150 se 180 s")
+ set(dc, "300 nw 315 nw 330 nw 270 w 210 sw 225 sw 240 sw")
+}
+function init() {+ printf ".PS\n"
+ if (firsttime++ == 0) {+ printf "copy \"%s\"\n", macros
+ printf "\ttextht = %g; textwid = .1; cwid = %g\n", textht, cwid
+ printf "\tlineht = %g; linewid = %g\n", lineht, linewid
+ }
+ printf "Last: 0,0\n"
+ RING = "R"; MOL = "M"; BOND = "B"; OTHER = "O" # manifests
+ last = OTHER
+ dir = 90
+}
+function setparams(scale) {+ lineht = scale * 0.2
+ linewid = scale * 0.2
+ textht = scale * 0.16
+ db = scale * 0.2 # bond length
+ cwid = scale * 0.12 # character width
+ cr = scale * 0.08 # rad of invis circles at ring vertices
+ crh = scale * 0.16 # ht of invis ellipse at ring vertices
+ crw = scale * 0.12 # wid
+ dav = scale * 0.015 # vertical shift up for atoms in atom macro
+ dew = scale * 0.02 # east-west shift for left of/right of
+ ringside = scale * 0.3 # side of all rings
+ dbrack = scale * 0.1 # length of bottom of bracket
+}
+
+ { lineno++ }+
+/^(\.cstart)|(begin chem)/ { init(); inchem = 1; next }+/^(\.cend)|(end)/ { inchem = 0; print ".PE"; next }+
+/^\./ { print; next } # troff+
+inchem == 0 { print; next } # everything else+
+$1 == "pic" { shiftfields(1); print; next } # pic pass-thru+$1 ~ /^#/ { next } # comment+
+$1 == "textht" { textht = $NF; next }+$1 == "cwid" { cwid = $NF; next }+$1 == "db" { db = $NF; next }+$1 == "size" { if ($NF <= 4) size = $NF; else size = $NF/10+ setparams(size); next }
+
+ { print "\n#", $0 } # debugging, etc.+ { lastname = "" }+
+$1 ~ /^[A-Z].*:$/ { # label; falls thru after shifting left+ lastname = substr($1, 1, length($1)-1)
+ print $1
+ shiftfields(1)
+}
+
+$1 ~ /^\"/ { print "Last: ", $0; last = OTHER; next }+
+$1 ~ /bond/ { bond($1); next }+$1 ~ /^(double|triple|front|back)$/ && $2 == "bond" {+ $1 = $1 $2; shiftfields(2); bond($1); next }
+
+$1 == "aromatic" { temp = $1; $1 = $2; $2 = temp }+$1 ~ /ring|benz/ { ring($1); next }+
+$1 == "methyl" { $1 = "CH3" } # left here as an example+
+$1 ~ /^[A-Z]/ { molecule(); next }+
+$1 == "left" { left[++stack] = fields(2, NF); printf("Last: [\n"); next }+
+$1 == "right" { bracket(); stack--; next }+
+$1 == "label" { label(); next }+
+/./ { print "Last: ", $0; last = OTHER } +
+END { if (firsttime == 0) error("did you forget .cstart and .cend?")+ if (inchem) printf ".PE\n"
+}
+
+function bond(type, i, goes, from) {+ goes = ""
+ for (i = 2; i <= NF; i++)
+ if ($i == ";") {+ goes = $(i+1)
+ NF = i - 1
+ break
+ }
+ leng = db
+ from = ""
+ for (cf = 2; cf <= NF; ) {+ if ($cf ~ /(\+|-)?[0-9]+|up|down|right|left|ne|se|nw|sw/)
+ dir = cvtdir(dir)
+ else if ($cf ~ /^leng/) {+ leng = $(cf+1)
+ cf += 2
+ } else if ($cf == "to") {+ leng = 0
+ from = fields(cf, NF)
+ break
+ } else if ($cf == "from") {+ from = dofrom()
+ break
+ } else if ($cf ~ /^#/) {+ cf = NF+1
+ break;
+ } else {+ from = fields(cf, NF)
+ break
+ }
+ }
+ if (from ~ /( to )|^to/) # said "from ... to ...", so zap length
+ leng = 0
+ else if (from == "") # no from given at all
+ from = "from Last." leave(last, dir) " " fields(cf, NF)
+ printf "Last: %s(%g, %g, %s)\n", type, leng, dir, from
+ last = BOND
+ if (lastname != "")
+ labsave(lastname, last, dir)
+ if (goes) {+ $0 = goes
+ molecule()
+ }
+}
+
+function dofrom( n, s) {+ cf++ # skip "from"
+ n = $cf
+ if (n in labtype) # "from Thing" => "from Thing.V.s"
+ return "from " n "." leave(labtype[n], dir)
+ if (n ~ /^\.[A-Z]/) # "from .V" => "from Last.V.s"
+ return "from Last" n "." corner(dir)
+ if (n ~ /^[A-Z][^.]*\.[A-Z][^.]*$/) # "from X.V" => "from X.V.s"
+ return "from " n "." corner(dir)
+ return fields(cf-1, NF)
+}
+
+function bracket( t) {+ printf("]\n")+ if ($2 == ")")
+ t = "spline"
+ else
+ t = "line"
+ printf("%s from last [].sw+(%g,0) to last [].sw to last [].nw to last [].nw+(%g,0)\n",+ t, dbrack, dbrack)
+ printf("%s from last [].se-(%g,0) to last [].se to last [].ne to last [].ne-(%g,0)\n",+ t, dbrack, dbrack)
+ if ($3 == "sub")
+ printf("\" %s\" ljust at last [].se\n", fields(4,NF))+}
+
+function molecule( n, type) {+ n = $1
+ if (n == "BP") {+ $1 = "\"\" ht 0 wid 0"
+ type = OTHER
+ } else {+ $1 = atom(n)
+ type = MOL
+ }
+ gsub(/[^A-Za-z0-9]/, "", n) # for stuff like C(OH3): zap non-alnum
+ if ($2 == "")
+ printf "Last: %s: %s with .%s at Last.%s\n", \
+ n, $0, leave(type,dir+180), leave(last,dir)
+ else if ($2 == "below")
+ printf("Last: %s: %s with .n at %s.s\n", n, $1, $3)+ else if ($2 == "above")
+ printf("Last: %s: %s with .s at %s.n\n", n, $1, $3)+ else if ($2 == "left" && $3 == "of")
+ printf("Last: %s: %s with .e at %s.w+(%g,0)\n", n, $1, $4, dew)+ else if ($2 == "right" && $3 == "of")
+ printf("Last: %s: %s with .w at %s.e-(%g,0)\n", n, $1, $4, dew)+ else
+ printf "Last: %s: %s\n", n, $0
+ last = type
+ if (lastname != "")
+ labsave(lastname, last, dir)
+ labsave(n, last, dir)
+}
+
+function label( i, v) {+ if (substr(labtype[$2], 1, 1) != RING)
+ error(sprintf("%s is not a ring", $2))+ else {+ v = substr(labtype[$2], 2, 1)
+ for (i = 1; i <= v; i++)
+ printf("\"\\s-3%d\\s0\" at 0.%d<%s.C,%s.V%d>\n", i, v+2, $2, $2, i)+ }
+}
+
+function ring(type, typeint, pt, verts, i) {+ pt = 0 # points up by default
+ if (type ~ /[1-8]$/)
+ verts = substr(type, length(type), 1)
+ else if (type ~ /flat/)
+ verts = 5
+ else
+ verts = 6
+ fused = other = ""
+ for (i = 1; i <= verts; i++)
+ put[i] = dbl[i] = ""
+ nput = aromatic = withat = 0
+ for (cf = 2; cf <= NF; ) {+ if ($cf == "pointing")
+ pt = cvtdir(0)
+ else if ($cf == "double" || $cf == "triple")
+ dblring(verts)
+ else if ($cf ~ /arom/) {+ aromatic++
+ cf++ # handled later
+ } else if ($cf == "put") {+ putring(verts)
+ nput++
+ } else if ($cf ~ /^#/) {+ cf = NF+1
+ break;
+ } else {+ if ($cf == "with" || $cf == "at")
+ withat = 1
+ other = other " " $cf
+ cf++
+ }
+ }
+ typeint = RING verts pt # RING | verts | dir
+ if (withat == 0)
+ fused = joinring(typeint, dir, last)
+ printf "Last: [\n"
+ makering(type, pt, verts)
+ printf "] %s %s\n", fused, other
+ last = typeint
+ if (lastname != "")
+ labsave(lastname, last, dir)
+}
+
+function makering(type, pt, v, i, a, r) {+ if (type ~ /flat/)
+ v = 6
+ # vertices
+ r = ringside / (2 * sin(pi/v))
+ printf "\tC: 0,0\n"
+ for (i = 0; i <= v+1; i++) {+ a = ((i-1) / v * 360 + pt) / deg
+ printf "\tV%d: (%g,%g)\n", i, r * sin(a), r * cos(a)
+ }
+ if (type ~ /flat/) {+ printf "\tV4: V5; V5: V6\n"
+ v = 5
+ }
+ # sides
+ if (nput > 0) { # hetero ...+ for (i = 1; i <= v; i++) {+ c1 = c2 = 0
+ if (put[i] != "") {+ printf("\tV%d: ellipse invis ht %g wid %g at V%d\n",+ i, crh, crw, i)
+ printf("\t%s at V%d\n", put[i], i)+ c1 = cr
+ }
+ j = i+1
+ if (j > v)
+ j = 1
+ if (put[j] != "")
+ c2 = cr
+ printf "\tline from V%d to V%d chop %g chop %g\n", i, j, c1, c2
+ if (dbl[i] != "") { # should check i<j+ if (type ~ /flat/ && i == 3) {+ rat = 0.75; fix = 5
+ } else {+ rat = 0.85; fix = 1.5
+ }
+ if (put[i] == "")
+ c1 = 0
+ else
+ c1 = cr/fix
+ if (put[j] == "")
+ c2 = 0
+ else
+ c2 = cr/fix
+ printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",
+ rat, i, rat, j, c1, c2
+ if (dbl[i] == "triple")
+ printf "\tline from %g<C,V%d> to %g<C,V%d> chop %g chop %g\n",
+ 2-rat, i, 2-rat, j, c1, c2
+ }
+ }
+ } else { # regular+ for (i = 1; i <= v; i++) {+ j = i+1
+ if (j > v)
+ j = 1
+ printf "\tline from V%d to V%d\n", i, j
+ if (dbl[i] != "") { # should check i<j+ if (type ~ /flat/ && i == 3) {+ rat = 0.75
+ } else
+ rat = 0.85
+ printf "\tline from %g<C,V%d> to %g<C,V%d>\n",
+ rat, i, rat, j
+ if (dbl[i] == "triple")
+ printf "\tline from %g<C,V%d> to %g<C,V%d>\n",
+ 2-rat, i, 2-rat, j
+ }
+ }
+ }
+ # punt on triple temporarily
+ # circle
+ if (type ~ /benz/ || aromatic > 0) {+ if (type ~ /flat/)
+ r *= .4
+ else
+ r *= .5
+ printf "\tcircle rad %g at 0,0\n", r
+ }
+}
+
+function putring(v) { # collect "put Mol at n"+ cf++
+ mol = $(cf++)
+ if ($cf == "at")
+ cf++
+ if ($cf >= 1 && $cf <= v) {+ m = mol
+ gsub(/[^A-Za-z0-9]/, "", m)
+ put[$cf] = m ":" atom(mol)
+ }
+ cf++
+}
+
+function joinring(type, dir, last) { # join a ring to something+ if (substr(last, 1, 1) == RING) { # ring to ring+ if (substr(type, 3) == substr(last, 3)) # fails if not 6-sided
+ return "with .V6 at Last.V2"
+ }
+ # if all else fails
+ return sprintf("with .%s at Last.%s", \+ leave(type,dir+180), leave(last,dir))
+}
+
+function leave(last, d, c, c1) { # return vertex of last in dir d+ if (last == BOND)
+ return "end"
+ d = reduce(d)
+ if (substr(last, 1, 1) == RING)
+ return ringleave(last, d)
+ if (last == MOL) {+ if (d == 0 || d == 180)
+ c = "C"
+ else if (d > 0 && d < 180)
+ c = "R"
+ else
+ c = "L"
+ if (d in dc)
+ c1 = dc[d]
+ else
+ c1 = corner(d)
+ return sprintf("%s.%s", c, c1)+ }
+ if (last == OTHER)
+ return corner(d)
+ return "c"
+}
+
+function ringleave(last, d, rd, verts) { # return vertex of ring in dir d+ verts = substr(last, 2, 1)
+ rd = substr(last, 3)
+ return sprintf("V%d.%s", int(reduce(d-rd)/(360/verts)) + 1, corner(d))+}
+
+function corner(dir) {+ return dc[reduce(45 * int((dir+22.5)/45))]
+}
+
+function labsave(name, type, dir) {+ labtype[name] = type
+ labdir[name] = dir
+}
+
+function dblring(v, d, v1, v2) { # should canonicalize to i,i+1 mod v+ d = $cf
+ for (cf++; $cf ~ /^[1-9]/; cf++) {+ v1 = substr($cf,1,1)
+ v2 = substr($cf,3,1)
+ if (v2 == v1+1 || v1 == v && v2 == 1) # e.g., 2,3 or 5,1
+ dbl[v1] = d
+ else if (v1 == v2+1 || v2 == v && v1 == 1) # e.g., 3,2 or 1,5
+ dbl[v2] = d
+ else
+ error(sprintf("weird %s bond in\n\t%s", d, $0))+ }
+}
+
+function cvtdir(d) { # maps "[pointing] somewhere" to degrees+ if ($cf == "pointing")
+ cf++
+ if ($cf ~ /^[+\-]?[0-9]+/)
+ return reduce($(cf++))
+ else if ($cf ~ /left|right|up|down|ne|nw|se|sw/)
+ return reduce(dc[$(cf++)])
+ else {+ cf++
+ return d
+ }
+}
+
+function reduce(d) { # reduces d to 0 <= d < 360+ while (d >= 360)
+ d -= 360
+ while (d < 0)
+ d += 360
+ return d
+}
+
+function atom(s, c, i, n, nsub, cloc, nsubc) { # convert CH3 to atom(...)+ if (s == "\"\"")
+ return s
+ n = length(s)
+ nsub = nsubc = 0
+ cloc = index(s, "C")
+ if (cloc == 0)
+ cloc = 1
+ for (i = 1; i <= n; i++)
+ if (substr(s, i, 1) !~ /[A-Z]/) {+ nsub++
+ if (i < cloc)
+ nsubc++
+ }
+ gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s)
+ if (s ~ /([^0-9]\.)|(\.[^0-9])/) # centered dot
+ gsub(/\./, "\\v#-.3m#.\\v#.3m#", s)
+ return sprintf("atom(\"%s\", %g, %g, %g, %g, %g, %g)",+ s, (n-nsub/2)*cwid, textht, (cloc-nsubc/2-0.5)*cwid, crh, crw, dav)
+}
+
+function in_line( i, n, s, s1, os) {+ s = $0
+ os = ""
+ while ((n = match(s, /!?[A-Z][A-Za-z]*(([0-9]+\.[0-9]+)|([0-9]+))/)) > 0) {+ os = os substr(s, 1, n-1) # prefix
+ s1 = substr(s, n, RLENGTH) # molecule
+ if (substr(s1, 1, 1) == "!") { # !mol => leave alone+ s1 = substr(s1, 2)
+ } else {+ gsub(/([0-9]+\.[0-9]+)|([0-9]+)/, "\\s-3\\d&\\u\\s+3", s1)
+ if (s1 ~ /([^0-9]\.)|(\.[^0-9])/) # centered dot
+ gsub(/\./, "\\v#-.3m#.\\v#.3m#", s1)
+ }
+ os = os s1
+ s = substr(s, n + RLENGTH) # tail
+ }
+ os = os s
+ print os
+ return
+}
+
+function shiftfields(n, i) { # move $n+1..$NF to $n..$NF-1, zap $NF+ for (i = n; i < NF; i++)
+ $i = $(i+1)
+ $NF = ""
+ NF--
+}
+
+function fields(n1, n2, i, s) {+ if (n1 > n2)
+ return ""
+ s = ""
+ for (i = n1; i <= n2; i++) {+ if ($i ~ /^#/)
+ break;
+ s = s $i " "
+ }
+ return s
+}
+
+function set(a, s, i, n, q) {+ n = split(s, q)
+ for (i = 1; i <= n; i += 2)
+ a[q[i]] = q[i+1]
+}
+
+function error(s) {+ printf "chem\007: error on line %d: %s\n", lineno, s | "cat 1>&2"
+}
--
⑨