shithub: purgatorio

ref: a411870ee4640241e3c494367d922847da84f972
dir: purgatorio/appl/cmd/sed.b

View raw version
implement Sed;

#
# partial sed implementation borrowed from plan9 sed.
#

include "sys.m";
	sys: Sys;
include "draw.m";
include "arg.m";
	arg: Arg;
include "bufio.m";
	bufio: Bufio;
	Iobuf: import bufio;
include "string.m";
	str: String;
include "regex.m";
	regex: Regex;
	Re: import regex;

Sed : module {
	init: fn(ctxt: ref Draw->Context, argv: list of string);
};


false, true: con iota;
bool: type int;

Addr: adt {
	pick {
	None =>
	Dollar =>
	Line =>
		line: int;
	Regex =>
		re: Re;
	}
};

Sedcom: adt {
	command: fn(c: self ref Sedcom);
	executable: fn(c: self ref Sedcom) : int;

	ad1, ad2: ref Addr;
	negfl: bool;
	active: int;

	pick {
	S =>
		gfl, pfl: int;
		re: Re;
		b: ref Iobuf;
		rhs: string;
	D or CD or P or Q or EQ or G or CG or H or CH or N or CN or X or CP or L=>
	A or C or I =>
		text: string;
	R =>
		filename: string;
	W =>
		b: ref Iobuf;
	Y =>
		map: list of (int, int);
	B or T or Lab =>
		lab: string;
	}
};

dflag := false;
nflag := false;
gflag := false;
sflag := 0;

delflag := 0;
dolflag := 0;
fhead := 0;
files: list of string;
fout: ref Iobuf;
infile: ref Iobuf;
jflag := 0;
lastregex:  Re;
linebuf: string;
filename := "";
lnum := 0;
peekc := 0;

holdsp := "";
patsp := "";

cmds: list of ref Sedcom;
appendlist: list of ref Sedcom;
bufioflush: list of ref Iobuf;

init(nil: ref Draw->Context, args: list of string)
{
	sys = load Sys Sys->PATH;

	if ((arg = load Arg Arg->PATH) == nil)
		fatal(sys->sprint("could not load %s: %r", Arg->PATH));

	if ((bufio = load Bufio Bufio->PATH) == nil)
		fatal(sys->sprint("could not load %s: %r", Bufio->PATH));

	if ((str = load String String->PATH) == nil)
		fatal(sys->sprint("could not load %s: %r", String->PATH));

	if ((regex = load Regex Regex->PATH) == nil)
		fatal(sys->sprint("could not load %s: %r", Regex->PATH));

	arg->init(args);

	compfl := 0;
	while ((c := arg->opt()) != 0)
		case c {
		'n' =>
			nflag = true;
		'g' =>
			gflag = true;
		'e' =>
			if ((s := arg->arg()) == nil)
				usage();
			filename = "";
			cmds = compile(bufio->sopen(s + "\n"), cmds);
			compfl = 1;
		'f' => if ((filename = arg->arg()) == nil)
				usage();
			b := bufio->open(filename, bufio->OREAD);
			if (b == nil)
				fatal(sys->sprint("couldn't open '%s': %r", filename));
			cmds = compile(b, cmds);
			compfl = 1;
		'd' =>
			dflag = true;
		* =>
			usage();
		}
	args = arg->argv();
	if (compfl == 0) {
		if (len args == 0)
			fatal("missing pattern");
		filename = "";
		cmds = compile(bufio->sopen(hd args + "\n"), cmds);
		args = tl args;
	}

	# reverse command list, we could compile addresses here if required
	l: list of ref Sedcom;
	for (p := cmds; p != nil; p = tl p) {
		l = hd p :: l;
	}
	cmds = l;

	# add files to file list (and reverse to get in right order)
	f: list of string;
	if (len args == 0)
		f = "" :: f;
	else for (; len args != 0; args = tl args)
		f = hd args :: f;
	for (;f != nil; f = tl f)
		files = hd f :: files;

	if ((fout = bufio->fopen(sys->fildes(1), bufio->OWRITE)) == nil)
		fatal(sys->sprint("couldn't buffer stdout: %r"));
	bufioflush = fout :: bufioflush;
	lnum = 0;
	execute(cmds);
	exits(nil);
}

depth := 0;
maxdepth: con 20;
cmdend := array [maxdepth] of string;
cmdcnt := array [maxdepth] of int;

compile(b: ref Iobuf, l: list of ref Sedcom) : list of ref Sedcom
{
	lnum = 1;

nextline:
	for (;;) {
		err: int;
		(err, linebuf) = getline(b);
		if (err < 0)
			break;
		
		s := linebuf;

		do {
			rep: ref Sedcom;
			ad1, ad2: ref Addr;
			negfl := 0;

			if (s != "")
				s = str->drop(s, " \t;");

			if (s == "" || s[0] == '#')
				continue nextline;

			# read addresses
			(s, ad1) = address(s);
			pick a := ad1 {
			None =>
				ad2 = ref Addr.None();
			* =>
				if (s != "" && (s[0] == ',' || s[0] == ';')) {
					(s, ad2) = address(s[1:]);
				}
				else {
					ad2 = ref Addr.None();
				}
			}

			s = str->drop(s, " \t");

			if (s != "" && str->in(s[0], "!")) {
				negfl = true;
				s = str->drop(s, "!");
			}
			s = str->drop(s, " \t");
			if (s == "")
				break;
			c := s[0]; s = s[1:];

			# mop up commands that got two addresses but only want one.
			case c {
				'a' or 'c' or 'q' or '=' or 'i' =>
					if (tagof ad2 != tagof Addr.None)
						fatal(sys->sprint("only one address allowed:  '%s'",
						      linebuf));
			}

			case c {
			* =>
				fatal(sys->sprint("unrecognised command: '%s' (%c)",
					linebuf, c));
			'a' =>
				if (s != "" && s[0] == '\\')
					s = s[1:];
				if (s == "" || s[0] != '\n')
					fatal("unexpected characters in a command: " + s);
				rep = ref Sedcom.A (ad1, ad2, negfl, 0, s[1:]);
				s = "";
			'c' =>
				if (s != "" && s[0] == '\\')
					s = s[1:];
				if (s == "" || s[0] != '\n')
					fatal("unexpected characters in c command: " + s);
				rep = ref Sedcom.C (ad1, ad2, negfl, 0, s[1:]);
				s = "";
			'i' =>
				if (s != "" && s[0] == '\\')
					s = s[1:];
				if (s == "" || s[0] != '\n')
					fatal("unexpected characters in i command: " + s);
				rep = ref Sedcom.I (ad1, ad2, negfl, 0, s[1:]);
				s = "";
			'r' =>
				s = str->drop(s, " \t");
				rep = ref Sedcom.R (ad1, ad2, negfl, 0, s);
				s = "";
			'w' =>
				if (s != "")
					s = str->drop(s, " \t");
				if (s == "")
					fatal("no filename in w command: " + linebuf);
				bo := bufio->open(s, bufio->OWRITE);
				if (bo == nil)
					bo = bufio->create(s, bufio->OWRITE, 8r666);
				if (bo == nil)
					fatal(sys->sprint("can't create output file: '%s'", s));
				bufioflush = bo :: bufioflush;
				rep = ref Sedcom.W (ad1, ad2, negfl, 0, bo);
				s = "";
				
			'd' =>
				rep = ref Sedcom.D (ad1, ad2, negfl, 0);
			'D' =>
				rep = ref Sedcom.CD (ad1, ad2, negfl, 0);
			'p' =>
				rep = ref Sedcom.P (ad1, ad2, negfl, 0);
			'P' =>
				rep = ref Sedcom.CP (ad1, ad2, negfl, 0);
			'q' =>
				rep = ref Sedcom.Q (ad1, ad2, negfl, 0);
			'=' =>
				rep = ref Sedcom.EQ (ad1, ad2, negfl, 0);
			'g' =>
				rep = ref Sedcom.G (ad1, ad2, negfl, 0);
			'G' =>
				rep = ref Sedcom.CG (ad1, ad2, negfl, 0);
			'h' =>
				rep = ref Sedcom.H (ad1, ad2, negfl, 0);
			'H' =>
				rep = ref Sedcom.CH (ad1, ad2, negfl, 0);
			'n' =>
				rep = ref Sedcom.N (ad1, ad2, negfl, 0);
			'N' =>
				rep = ref Sedcom.CN (ad1, ad2, negfl, 0);
			'x' =>
				rep = ref Sedcom.X (ad1, ad2, negfl, 0);
			'l' =>
				rep = ref Sedcom.L (ad1, ad2, negfl, 0);
 			'y' =>
				if (s == "")
					fatal("expected args: " + linebuf);
				seof := s[0:1];
				s = s[1:];
				if (s == "")
					fatal("no lhs: " + linebuf);
				(lhs, s2) := str->splitl(s, seof);
				if (s2 == "")
					fatal("no lhs terminator: " + linebuf);
				s2 = s2[1:];
				(rhs, s4) := str->splitl(s2, seof);
				if (s4 == "")
					fatal("no rhs: " + linebuf);
				s = s4[1:];
				if (len lhs != len rhs)
					fatal("y command needs same length sets: " + linebuf);
				map: list of (int, int);
				for (i := 0; i < len lhs; i++)
					map = (lhs[i], rhs[i]) :: map;
				rep = ref Sedcom.Y (ad1, ad2, negfl, 0, map);
			's' =>
				seof := s[0:1];
				re: Re;
				(re, s) = recomp(s);
				rhs: string;
				(s, rhs) = compsub(seof + s);

				gfl := gflag;
				pfl := 0;

				if (s != "" && s[0] == 'g') {
					gfl = 1;
					s = s[1:];
				}
				if (s != "" && s[0] == 'p') {
					pfl = 1;
					s = s[1:];
				}
				if (s != "" && s[0] == 'P') {
					pfl = 2;
					s = s[1:];
				}

				b: ref Iobuf = nil;
				if (s != "" && s[0] == 'w') {
					s = s[1:];
					if (s != "")
						s = str->drop(s, " \t");
					if (s == "")
						fatal("no filename in s with w: " + linebuf);
					b = bufio->open(s, bufio->OWRITE);
					if (b == nil)
						b = bufio->create(s, bufio->OWRITE, 8r666);
					if (b == nil)
						fatal(sys->sprint("can't create output file: '%s'", s));
					bufioflush = b :: bufioflush;
					s = "";
				}
				rep = ref Sedcom.S (ad1, ad2, negfl, 0, gfl, pfl, re, b, rhs);
			':' =>
				if (s != "")
					s = str->drop(s, " \t");
				(lab, s1) := str->splitl(s, " \t;#");
				s = s1;
				if (lab == "")
					fatal(sys->sprint("null label: '%s'", linebuf));
				if (findlabel(lab))
					fatal(sys->sprint("duplicate label: '%s'", lab));
				rep = ref Sedcom.Lab (ad1, ad2, negfl, 0, lab);
			'b' or 't' =>
				if (s != "")
					s = str->drop(s, " \t");
				(lab, s1) := str->splitl(s, " \t;#");
				s = s1;
				if (c == 'b')
					rep = ref Sedcom.B (ad1, ad2, negfl, 0, lab);
				else
					rep = ref Sedcom.T (ad1, ad2, negfl, 0, lab);
			'{' =>
				# replace { with branch to }.
				lab := mklab(depth);
				depth++;
				rep = ref Sedcom.B (ad1, ad2, !negfl, 0, lab);
				s = ";" + s;
			'}' =>
				if (tagof ad1 != tagof Addr.None)
					fatal("did not expect address:" + linebuf);
				if (--depth < 0)
					fatal("too many }'s: " + linebuf);
				lab := mklab(depth);
				cmdcnt[depth]++;
				rep = ref Sedcom.Lab ( ad1, ad2, negfl, 0, lab);
				s = ";" + s;
			}

			l = rep :: l;
		} while (s != nil && str->in(s[0], ";{}"));

		if (s != nil)
			fatal("leftover junk: " + s);
	}
	return l;
}

findlabel(lab: string) : bool
{
	for (l := cmds; l != nil; l = tl l)
		pick x := hd l {
		Lab =>
			if (x.lab == lab)
				return true;
		}
	return false;
}

mklab(depth: int): string
{
	return "_" + string cmdcnt[depth] + "_" + string depth;
}

Sedcom.command(c: self ref Sedcom)
{
	pick x := c {
	S =>
		m: bool;
		(m, patsp) = substitute(x, patsp);
		if (m) {
			case x.pfl {
			0 =>
				;
			1 =>
				fout.puts(patsp + "\n");
			* =>
				l: string;
				(l, patsp) = str->splitl(patsp, "\n");
				fout.puts(l + "\n");
				break;
			}
			if (x.b != nil)
				x.b.puts(patsp + "\n");
		}
	P =>
		fout.puts(patsp + "\n");
	CP =>
		(s, nil) := str->splitl(patsp, "\n");
		fout.puts(s + "\n");
	A =>
		appendlist = c :: appendlist;
	R =>
		appendlist = c :: appendlist;
	C =>
		delflag++;
		if (c.active == 1)
			fout.puts(x.text + "\n");
	I =>
		fout.puts(x.text + "\n");
	W =>
		x.b.puts(patsp + "\n");
	G =>
		patsp = holdsp;
	CG =>
		patsp += holdsp;
	H =>
		holdsp = patsp;
	CH =>
		holdsp += patsp;
	X =>
		(holdsp, patsp) = (patsp, holdsp);
	Y =>
		# yes this is O(N²).
		for (i := 0; i < len patsp; i++)
			for (h := x.map; h != nil; h = tl h) {
				(s, d) := hd h;
				if (patsp[i] == s)
					patsp[i] = d;
			}
	D =>
		delflag++;
	CD =>
		# loose upto \n.
		(s1, s2) := str->splitl(patsp, "\n");
		if (s2 == nil)
			patsp = s1;
		else if (len s2 > 1)
			patsp = s2[1:];
		else
			patsp = "";
		jflag++;
	Q =>
		if (!nflag)
			fout.puts(patsp + "\n");
		arout();
		exits(nil);
	N =>
		if (!nflag)
			fout.puts(patsp + "\n");
		arout();
		n: int;
		(patsp, n) = gline();
		if (n < 0)
			delflag++;
	CN =>
		arout();
		(ns, n) := gline();
		if (n < 0)
			delflag++;
		patsp += "\n" + ns;
	EQ =>
		fout.puts(sys->sprint("%d\n", lnum));
	Lab =>
		# labels don't do anything.
	B =>
		jflag = true;
	T =>
		if (sflag) {
			sflag = false;
			jflag = true;
		}
	L =>
		col := 0;
		cc := 0;
		for (i := 0; i < len patsp; i++) {
			s := "";
			cc = patsp[i];
			if (cc >= 16r20 && cc < 16r7F && cc != '\n')
				s[len s] = cc;
			else
				s = trans(cc);
			for (j := 0; j < len s; j++) {
				fout.putc(s[j]);
				if (col++ > 71) {
					fout.puts("\\\n");
					col = 0;
				}
			}
		}
		if (cc == ' ')
			fout.puts("\\n");
		fout.putc('\n');
	* =>
		fatal("unhandled command");
	}
}

trans(ch: int) : string
{
	case ch {
	'\b' =>
		return "\\b";
	'\n' =>
		return "\\n";
	'\r' =>
		return "\\r";
	'\t' =>
		return "\\t";
	'\\' =>
		return "\\\\";
	* =>
		return sys->sprint("\\u%.4ux", ch);
	}
}

getline(b: ref Iobuf) : (int, string)
{
	w : string;

	lnum++;

	while ((c := b.getc()) != bufio->EOF) {
		r := c;
		if (r == '\\') {
			w[len w] = r;
			if ((c = b.getc()) == bufio->EOF)
				break;
			r = c;
		}
		else if (r == '\n')
			return (1, w);
		w[len w] = r;
	}
	return (-1, w);
}

address(s: string) : (string, ref Addr)
{
	case s[0] {
	'$' =>
		return (s[1:], ref Addr.Dollar());
	'/' =>
		(r, s1) := recomp(s);
		if (r == nil)
			r = lastregex;
		if (r == nil)
			fatal("First RE in address may not be null");
		return (s1, ref Addr.Regex(r));
	'0' to '9' =>
		(lno, ls) := str->toint(s, 10);
		if (lno == 0)
			fatal("line no 0 is illegal address");
		return (ls, ref Addr.Line(lno));
	* =>
		return (s, ref Addr.None());
	}
}

recomp(s :string) : (Re, string)
{
	expbuf := "";

	seof := s[0]; s = s[1:];
	if (s[0] == seof)
		return (nil, s[1:]); # //

	c := s[0]; s = s[1:];
	do {
		if (c == '\0' || c == '\n')
			fatal("too much text: " + linebuf);
		if (c == '\\') {
			expbuf[len expbuf] = c;
			c = s[0]; s = s[1:];
			if (c == 'n')
				c = '\n';
		}
		expbuf[len expbuf] = c;
		c = s[0]; s = s[1:];
	} while (c != seof);

	(r, err) := regex->compile(expbuf, 1);
	if (r == nil)
		fatal(sys->sprint("%s '%s'", err, expbuf));

	lastregex = r;

	return (r, s);
}

compsub(s: string): (string, string)
{
	seof := s[0];
	rhs := "";
	for (i := 1; i < len s; i++) {
		r := s[i];
		if (r == seof)
			break;
		if (r == '\\') {
			rhs[len rhs] = r;
			if(++i >= len s)
				break;
			r = s[i];
		}
		rhs[len rhs] = r;
	}
	if (i >= len s)
		fatal(sys->sprint("no closing %c in replacement text: %s", seof,  linebuf));
	return (s[i+1:], rhs);
}		

execute(l: list of ref Sedcom)
{
	for (;;) {
		n: int;

		(patsp, n) = gline();
		if (n < 0)
			break;

cmdloop:
		for (p := l; p != nil;) {
			c := hd p;
			if (!c.executable()) {
				p = tl p;
				continue;
			}

			c.command();

			if (delflag)
				break;
			if (jflag) {
				jflag = 0;
				pick x := c {
				B or T =>
					if (p == nil)
						break cmdloop;
					for (p = l; p != nil; p = tl p) {
						pick cc := hd p {
						Lab =>
							if (cc.lab == x.lab)
								continue cmdloop;
						}
					}
					break cmdloop; # unmatched branch => end of script
				* =>
					# don't branch.
				}
			}
			else
				p = tl p;
		}
		if (!nflag && !delflag)
			fout.puts(patsp + "\n");
		arout();
		delflag = 0;
	}
}

Sedcom.executable(c: self ref Sedcom) : int
{
	if (c.active) {
		if (c.active == 1)
			c.active = 2;
		pick x := c.ad2 {
		None =>
			c.active = 0;
		Dollar =>
			return !c.negfl;
		Line =>
			if (lnum <= x.line) {
				if (x.line == lnum)
					c.active = 0;
				return !c.negfl;
			}
			c.active = 0;
			return c.negfl;
		Regex =>
			if (match(x.re, patsp))
				c.active = false;
			return !c.negfl;
		}
	}
	pick x := c.ad1 {
	None =>
		return !c.negfl;
	Dollar =>
		if (dolflag)
			return !c.negfl;
	Line =>
		if (x.line == lnum) {
			c.active = 1;
			return !c.negfl;
		}
	Regex =>
		if (match(x.re, patsp)) {
			c.active = 1;
			return !c.negfl;
		}
	}
	return c.negfl;
}

arout()
{
	a: list of ref Sedcom;

	while (appendlist != nil) {
		a = hd appendlist :: a;
		appendlist = tl appendlist;
	}

	for (; a != nil; a = tl a)
		pick x := hd a {
		A =>
			fout.puts(x.text + "\n");
		R =>
			if ((b := bufio->open(x.filename, bufio->OREAD)) == nil)
				fatal(sys->sprint("couldn't open '%s'", x.filename));
			while ((c := b.getc()) != bufio->EOF)
				fout.putc(c);
			b.close();
		* =>
			fatal("unexpected command on appendlist");
		}
}

match(re: Re, s: string) : bool
{
	return re != nil && regex->execute(re, s) != nil;
}

substitute(c: ref Sedcom.S, s: string) : (bool, string)
{
	if (!match(c.re, s))
		return (false, s);
	sflag = true;
	start := 0;

	# Beware of infinite loops: 's/$/i/g', 's/a/aa/g', 's/^/a/g'
	do {
		se := (start, len s);
		if ((m := regex->executese(c.re, s, se, true, true)) == nil)
			break;
		(l, r) := m[0];
		rep := "";
		for (i := 0; i < len c.rhs; i++){
			if (c.rhs[i] != '\\'  || i+1 == len c.rhs){
				if (c.rhs[i] == '&')
					rep += s[l: r];
				else
					rep[len rep] = c.rhs[i];
			}else {
				i++;
				case c.rhs[i] {
				'0' to '9' =>
					n := c.rhs[i] - '0';
					# elide if too big
					if (n < len m) {
						(beg, end) := m[n];
						rep += s[beg:end];
					}
				'n' =>
					rep[len rep] = '\n';
				* =>
					rep[len rep] = c.rhs[i];
				}
			}
		}
		s = s[0:l] + rep + s[r:];
		start = l + len rep;
		if(r == l)
			start++;
	} while (c.gfl);
	return (true, s);
}

gline() : (string, int)
{
	if (infile == nil && opendatafile() < 0)
		return (nil, -1);

	sflag = false;
	lnum++;

	s := "";
	do {
		c := peekc;
		if (c == 0)
			c = infile.getc();
		for (; c != bufio->EOF; c = infile.getc()) {
			if (c == '\n') {
				if ((peekc = infile.getc()) == bufio->EOF)
					if (fhead == 0)
						dolflag = 1;
				return (s, 1);
			}
			s[len s] = c;
		}
		if (len s != 0) {
			peekc = bufio->EOF;
			if (fhead == 0)
				dolflag = 1;
			return (s, 1);
		}
		peekc = 0;
		infile = nil;			
	} while (opendatafile() > 0);
	infile = nil;
	return (nil, -1);
}

opendatafile() : int
{
	if (files == nil)
		return -1;
	if (hd files != nil) {
		if ((infile = bufio->open(hd files, bufio->OREAD)) == nil)
			fatal(sys->sprint("can't open '%s'", hd files));
	}
	else if ((infile = bufio->fopen(sys->fildes(0), bufio->OREAD)) == nil)
		fatal("can't buffer stdin");

	files = tl files;
	return 1;	
}

dbg(s: string)
{
	if (dflag)
		sys->print("dbg: %s\n", s);
}

usage()
{
	sys->fprint(stderr(), "usage: %s [-ngd] [-e expr] [-f file] [expr] [file...]\n",
		arg->progname());
	exits("usage");
}

fatal(s: string)
{
	f := filename;
	if (f == nil)
		f = "<stdin>";
	sys->fprint(stderr(), "%s:%d %s\n", f, lnum, s);
	exits("error");
}

exits(e: string)
{
	for(; bufioflush != nil; bufioflush = tl bufioflush)
		(hd bufioflush).flush();
	if (e != nil)
		raise "fail:" + e;
	exit;
}

stderr() : ref Sys->FD
{
	return sys->fildes(2);
}