ref: d1540c7f666e3c5d636b48c956b444205b50502d
dir: /appl/cmd/look.b/
implement Look;
#
# Copyright © 2002 Lucent Technologies Inc.
# transliteration of the Plan 9 command; subject to the Lucent Public License 1.02
# -r option added by Caerwyn Jones to print a range
#
include "sys.m";
sys: Sys;
include "draw.m";
include "bufio.m";
bufio: Bufio;
Iobuf: import bufio;
include "arg.m";
Look: module
{
init: fn(nil: ref Draw->Context, nil: list of string);
};
filename := "/lib/words";
dfile: ref Iobuf;
bout: ref Iobuf;
debug := 0;
fold, direc, exact, iflag, range: int;
rev := 1; # -1 for reverse-ordered file, not implemented
compare: ref fn(a, b: string): int;
tab := '\t';
entry: string;
word: string;
key: string;
latin_fold_tab := array[64] of {
# Table to fold latin 1 characters to ASCII equivalents
# based at Rune value 0xc0
#
# À Á Â Ã Ä Å Æ Ç
# È É Ê Ë Ì Í Î Ï
# Ð Ñ Ò Ó Ô Õ Ö ×
# Ø Ù Ú Û Ü Ý Þ ß
# à á â ã ä å æ ç
# è é ê ë ì í î ï
# ð ñ ò ó ô õ ö ÷
# ø ù ú û ü ý þ ÿ
#
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0,
'o', 'u', 'u', 'u', 'u', 'y', 0, 0,
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0,
'o', 'u', 'u', 'u', 'u', 'y', 0, 'y',
};
init(nil: ref Draw->Context, args: list of string)
{
sys = load Sys Sys->PATH;
bufio = load Bufio Bufio->PATH;
arg := load Arg Arg->PATH;
lastkey: string;
arg->init(args);
arg->setusage("look -[dfinx] [-r lastkey] [-t c] [string] [file]");
compare = acomp;
while((c := arg->opt()) != 0)
case c {
'D' =>
debug = 1;
'd' =>
direc++;
'f' =>
fold++;
'i' =>
iflag++;
'n' =>
compare = ncomp;
't' =>
tab = (arg->earg())[0];
'x' =>
exact++;
'r' =>
range++;
lastkey = rcanon(arg->earg());
* =>
arg->usage();
}
args = arg->argv();
arg = nil;
bin := bufio->fopen(sys->fildes(0), Sys->OREAD);
bout = bufio->fopen(sys->fildes(1), Sys->OWRITE);
orig: string;
if(!iflag){
if(args != nil){
orig = hd args;
args = tl args;
}else
iflag++;
}
if(args == nil){
direc++;
fold++;
}else
filename = hd args;
if(!iflag)
key = rcanon(orig);
if(debug)
sys->fprint(sys->fildes(2), "orig %s key %s %s\n", orig, key, filename);
dfile = bufio->open(filename, Sys->OREAD);
if(dfile == nil){
sys->fprint(sys->fildes(2), "look: can't open %s\n", filename);
raise "fail:no dictionary";
}
if(!iflag)
if(!locate() && !range)
raise "fail:not found";
do{
if(iflag){
bout.flush();
if((orig = bin.gets('\n')) == nil)
exit;
key = rcanon(orig);
if(!locate())
continue;
}
if(range){
if(compare(key, word) <= 0 && compare(word, lastkey) <= 0)
bout.puts(entry);
}else if(!exact || acomp(word, orig) == 0)
bout.puts(entry);
Matches:
while((entry = dfile.gets('\n')) != nil){
word = rcanon(entry);
if(range)
n := compare(word, lastkey);
else
n = compare(key, word);
if(debug)
sys->print("compare %d %q\n", n, word);
case n {
-2 =>
if(!range)
break Matches;
bout.puts(entry);
-1 =>
if(exact)
break Matches;
bout.puts(entry);
0 =>
if(!exact || acomp(word, orig) == 0)
bout.puts(entry);
* =>
break Matches;
}
}
}while(iflag);
bout.flush();
}
locate(): int
{
bot := big 0;
top := dfile.seek(big 0, 2);
mid: big;
Search:
for(;;){
mid = (top+bot)/big 2;
if(debug)
sys->fprint(sys->fildes(2), "locate %bd %bd %bd\n", top, mid, bot);
dfile.seek(mid, 0);
c: int;
do
c = dfile.getc();
while(c >= 0 && c != '\n');
mid = dfile.offset();
if((entry = dfile.gets('\n')) == nil)
break;
word = rcanon(entry);
if(debug)
sys->fprint(sys->fildes(2), "mid %bd key: %s entry: %s\n", mid, key, word);
n := compare(key, word);
if(debug)
sys->fprint(sys->fildes(2), "compare: %d\n", n);
case n {
-2 or -1 or 0 =>
if(top <= mid)
break Search;
top = mid;
1 or 2 =>
bot = mid;
}
}
if(debug)
sys->fprint(sys->fildes(2), "locate %bd %bd %bd\n", top, mid, bot);
bot = dfile.seek(big bot, 0);
while((entry = dfile.gets('\n')) != nil){
word = rcanon(entry);
if(debug)
sys->fprint(sys->fildes(2), "seekbot %bd key: %s entry: %s\n", bot, key, word);
n := compare(key, word);
if(debug)
sys->fprint(sys->fildes(2), "compare: %d\n", n);
case n {
-2 =>
return 0;
-1 =>
return !exact;
0 =>
return 1;
1 or 2 =>
;
}
}
return 0;
}
#
# acomp(s, t) returns:
# -2 if s strictly precedes t
# -1 if s is a prefix of t
# 0 if s is the same as t
# 1 if t is a prefix of s
# 2 if t strictly precedes s
#
acomp(s, t: string): int
{
if(s == t)
return 0;
l := len s;
if(l > len t)
l = len t;
cs, ct: int;
for(i := 0; i < l; i++) {
cs = s[i];
ct = t[i];
if(cs != ct)
break;
}
if(i == len s)
return -1;
if(i == len t)
return 1;
if(cs < ct)
return -2;
return 2;
}
rcanon(s: string): string
{
if(s != nil && s[len s - 1] == '\n')
s = s[0: len s - 1];
o := 0;
for(i := 0; i < len s && (r := s[i]) != tab; i++){
if(islatin1(r) && (mr := latin_fold_tab[r-16rc0]) != 0)
r = mr;
if(direc)
if(!(isalnum(r) || r == ' ' || r == '\t'))
continue;
if(fold)
if(isupper(r))
r = tolower(r);
if(r != s[o]) # avoid copying s unless necessary
s[o] = r;
o++;
}
if(o != i)
return s[0:o];
return s;
}
sgn(v: int): int
{
if(v < 0)
return -1;
if(v > 0)
return 1;
return 0;
}
ncomp(s: string, t: string): int
{
while(len s > 0 && isspace(s[0]))
s = s[1:];
while(len t > 0 && isspace(t[0]))
t = t[1:];
ssgn := tsgn := -2*rev;
if(s != nil && s[0] == '-'){
s = s[1: ];
ssgn = -ssgn;
}
if(t != nil && t[0] == '-'){
t = t[1:];
tsgn = -tsgn;
}
for(i := 0; i < len s && isdigit(s[i]); i++)
;
is := s[0:i];
js := s[i:];
for(i = 0; i < len t && isdigit(t[i]); i++)
;
it := t[0:i];
jt := t[i:];
a := 0;
i = len is;
j := len it;
if(ssgn == tsgn){
while(j > 0 && i > 0)
if((b := it[--j] - is[--i]) != 0)
a = b;
}
while(i > 0)
if(is[--i] != '0')
return -ssgn;
while(j > 0)
if(it[--i] != '0')
return tsgn;
if(a)
return sgn(a)*ssgn;
s = js;
if(len s > 0 && s[0] == '.')
s = s[1: ];
t = jt;
if(len t > 0 && t[0] == '.')
t = t[1: ];
if(ssgn == tsgn)
while((len s > 0 && isdigit(s[0])) && (len t > 0 && isdigit(t[0]))){
if(a = t[0] - s[0])
return sgn(a)*ssgn;
s = s[1:];
t = t[1:];
}
for(; len s > 0 && isdigit(s[0]); s = s[1:])
if(s[0] != '0')
return -ssgn;
for(; len t > 0 && isdigit(t[0]); t = t[1:])
if(t[0] != '0')
return tsgn;
return 0;
}
isupper(c: int): int
{
return c >= 'A' && c <= 'Z';
}
islower(c: int): int
{
return c >= 'a' && c <= 'z';
}
isalpha(c: int): int
{
return islower(c) || isupper(c);
}
islatin1(c: int): int
{
return c >= 16rC0 && c <= 16rFF;
}
isdigit(c: int): int
{
return c >= '0' && c <= '9';
}
isalnum(c: int): int
{
return isdigit(c) || islower(c) || isupper(c);
}
isspace(c: int): int
{
return c == ' ' || c == '\t' || c >= 16r0A && c <= 16r0D;
}
tolower(c: int): int
{
return c-'A'+'a';
}