shithub: riscv

Download patch

ref: 266881b45935d644caad088549cd37697a200bd0
parent: 5af61e312772ca2a42c19f40cecf40e8d2c3e800
author: Ori Bernstein <ori@eigenstate.org>
date: Mon Sep 25 18:03:42 EDT 2023

git: replace index format for ~200x speedup

his change replaces our index format with a flat file, where
each line is in the form of:

	T qid mode path
	R qid mode path
	A qid mode path

where T is a tracked file, R is a file that's flagged for
removal, and A is a path that is added but not yet committed.
Lines may be repeated, and the last line in the file is used
to represent the status of a path.

The QID is in the index so that we can short circuit reading
the full contents if the file has not been changed. The mode
is there to avoid stats and walks through the git repository.

If the qid is NOQID or the mode is 0, then the git repository
contents are consulted to decide if the file is the same.

Tested on the 9front repo:

	Old walk:

		@{cd / && time /sys/src/cmd/git/6.owalk -q}
		0.08u 1.73s 2.82r 	 /sys/src/cmd/git/6.owalk -q  # status= DM

	New walk:

		@{cd / && time git/walk -q}
		0.05u 0.02s 0.12r 	 git/walk -q  # status= RM

--- a/sys/src/cmd/git/add
+++ b/sys/src/cmd/git/add
@@ -7,32 +7,17 @@
 flagfmt='r:remove'; args='file ...'
 eval `''{aux/getflags $*} || exec aux/usage
 
-add='tracked'
-del='removed'
-if(~ $remove 1){
-	add='removed'
-	del='tracked'
-}
+s=A
+if(~ $remove 1)
+	s=R
 if(~ $#* 0)
 	exec aux/usage
 
 paths=`$nl{cleanname -d $gitrel $* | drop $gitroot}
-if(~ $add tracked)
+if(~ $s A)
 	files=`$nl{walk -f ./$paths}
 if not
-	files=`$nl{cd .git/index9/tracked/ && walk -f ./$paths}
-
-for(f in $files){
-	if(! ~ `$nl{cleanname $f} .git/*){
-		addpath=.git/index9/$add/$f
-		delpath=.git/index9/$del/$f
-		mkdir -p `$nl{basename -d $addpath}
-		mkdir -p `$nl{basename -d $delpath}
-		# We don't want a matching qid, so that
-		# git/walk doesn't think this came from
-		# a checkout.
-		echo -n > $addpath
-		rm -f $delpath
-	}
-}
+	files=`$nl{cd .git/fs/HEAD/tree && walk -f ./$paths}
+for(f in $files)
+	echo $s NOQID 0 $f >> .git/INDEX9
 exit ''
--- a/sys/src/cmd/git/branch
+++ b/sys/src/cmd/git/branch
@@ -4,7 +4,7 @@
 
 gitup
 
-flagfmt='a:listall, b:baseref ref, d:delete, n:newbr, s:stay, m:merge'
+flagfmt='a:listall, b:baseref ref, d:delete, n:newbr, s:stay, m:merge, M:nomod'
 args='[branch]'
 eval `''{aux/getflags $*} || exec aux/usage
 
@@ -84,7 +84,6 @@
 for(m in $cleanpaths){
 	d=`$nl{basename -d $m}
 	mkdir -p $d
-	mkdir -p .git/index9/tracked/$d
 	# Modifications can turn a file into
 	# a directory, or vice versa, so we
 	# need to delete and copy the files
@@ -97,11 +96,11 @@
 		b=file
 	if(! ~ $a $b){
 		rm -rf $m
-		rm -rf .git/index9/tracked/$m
+		echo R NOQID 0 $m >> .git/INDEX9
 	}
 	if(~ $b file){
 		cp -x -- $basedir/tree/$m $m
-		walk -eq $m > .git/index9/tracked/$m
+		echo T NOQID 0 $m >> .git/INDEX9
 		touch $m
 	}
 }
@@ -112,9 +111,9 @@
 	merge1 $ours $ours $common $theirs
 }
 
-if(! ~ $#deleted 0){
-	rm -f $deleted
-	rm -f .git/index9/tracked/$deleted
+for(d in $deleted){
+	rm -f $d
+	echo R NOQID 0 $d >> .git/INDEX9
 }
 
 echo ref: $new > .git/HEAD
--- a/sys/src/cmd/git/clone
+++ b/sys/src/cmd/git/clone
@@ -87,11 +87,8 @@
 		git/fs
 		@ {builtin cd $tree && tar cif /fd/1 .} | @ {tar xf /fd/0} \
 			|| die 'checkout failed:' $status
-		for(f in `$nl{walk -f $tree | drop $tree}){
-			idx=.git/index9/tracked/$f
-			mkdir -p `$nl{basename -d $idx}
-			walk -eq ./$f > $idx
-		}
+		{for(f in `$nl{cd $tree && walk -f})
+			echo 'T NOQID 0 '$f} > .git/INDEX9
 	}
 	if not{
 		echo no default branch >[1=2]
--- a/sys/src/cmd/git/commit
+++ b/sys/src/cmd/git/commit
@@ -65,8 +65,8 @@
 fn parents{
 	if(! ~ $#revise 0)
 		parents=`{cat $gitfs/HEAD/parent}
-	if not if(test -f .git/index9/merge-parents)
-		parents=`{cat .git/index9/merge-parents | sort | uniq}
+	if not if(test -f .git/merge-parents)
+		parents=`{cat .git/merge-parents | sort | uniq}
 	if not if(~ $initial true)
 		parents=()
 	if not
@@ -78,7 +78,7 @@
 	if(! ~ $#parents 0)
 		pflags='-p'^$parents
 	hash=`{git/save -n $"name -e $"email -m $"msg $pflags $files || die $status}
-	rm -f .git/index9/merge-parents
+	rm -f .git/merge-parents
 }
 
 fn update{
@@ -89,14 +89,10 @@
 	echo $branch: $hash
 	echo $hash > $refpath
 	for(f in $files){
-		if(test -e .git/index9/removed/$f || ! test -e $f){
-			rm -f .git/index9/removed/$f
-			rm -f .git/index9/tracked/$f
-		}
-		if not{
-			mkdir -p `{basename -d $f}
-			walk -eq $f > .git/index9/tracked/$f
-		}
+		if(! test -e $f && ! test -e .git/object/$hash/tree/$f)
+			echo R NOQID 0 $f >> .git/INDEX9
+		if not
+			echo T NOQID 0 $f >> .git/INDEX9
 	}
 }
 
@@ -120,12 +116,12 @@
 }
 
 files=()
-if(test -f .git/index9/merge-parents)
-	files=`$nl{git/query -c `{cat .git/index9/merge-parents} | sed 's/^..//'}
+if(test -f .git/merge-parents)
+	files=`$nl{git/query -c `{cat .git/merge-parents} | sed 's/^..//'}
 if(! ~ $#* 0)
 	files=($files `$nl{git/walk -c `$nl{cleanname -d $gitrel $*}})
-if(~ $status '' || ~ $#files 0 && ! test -f .git/index9/merge-parents && ~ $#revise 0)
-	die 'nothing to commit' $status
+if(~ $status '' || ~ $#files 0 && ! test -f .git/merge-parents && ~ $#revise 0)
+	die 'nothing to commit'
 @{
 	flag e +
 	whoami
--- a/sys/src/cmd/git/conf.c
+++ b/sys/src/cmd/git/conf.c
@@ -59,7 +59,7 @@
 main(int argc, char **argv)
 {
 	char repo[512], *p, *s;
-	int i, j;
+	int i, j, nrel;
 
 	ARGBEGIN{
 	case 'f':	file[nfile++]=EARGF(usage());	break;
@@ -69,7 +69,7 @@
 	}ARGEND;
 
 	if(findroot){
-		if(findrepo(repo, sizeof(repo)) == -1)
+		if(findrepo(repo, sizeof(repo), &nrel) == -1)
 			sysfatal("%r");
 		print("%s\n", repo);
 		exits(nil);
--- a/sys/src/cmd/git/diff
+++ b/sys/src/cmd/git/diff
@@ -4,19 +4,24 @@
 
 gitup
 
-flagfmt='c:commit branch, s:summarize'; args='[file ...]'
+flagfmt='c:commit branch, s:summarize, u:uncommitted'; args='[file ...]'
 eval `''{aux/getflags $*} || exec aux/usage
 
-if(~ $#commit 0)
+if(~ $#commit 0){
 	commit=HEAD
+	cparam=()
+}
 
 files=()
+filt=MAR
+if(~ $#uncommitted 1)
+	filt=MARU
 if(! ~ $#* 0)
 	files=`{cleanname -d $gitrel $*}
 
 branch=`{git/query -p $commit}
 if(~ $summarize 1){
-	git/walk -fMAR -b $commit $files
+	git/walk -f$filt $cparam $files
 	exit
 }
 
@@ -24,7 +29,7 @@
 mntgen /mnt/scratch
 bind $branch/tree/ /mnt/scratch/a
 bind . /mnt/scratch/b
-for(f in `$nl{git/walk -c -fRMA -b $commit $files}){
+for(f in `$nl{git/walk -c -f$filt $cparam $files}){
 	if(~ $#showed 0){
 		echo diff `{git/query $commit} uncommitted
 		showed=1
--- a/sys/src/cmd/git/git.h
+++ b/sys/src/cmd/git/git.h
@@ -21,6 +21,7 @@
 typedef struct Dblock	Dblock;
 typedef struct Objq	Objq;
 typedef struct Qelt	Qelt;
+typedef struct Idxent	Idxent;
 
 enum {
 	Pathmax		= 512,
@@ -190,6 +191,13 @@
 	int	len;
 };
 
+struct Idxent {
+	char	*path;
+	Qid	qid;
+	int	mode;
+	int	order;
+	char	state;
+};
 
 #define GETBE16(b)\
 		((((b)[0] & 0xFFul) <<  8) | \
@@ -301,9 +309,10 @@
 int	hassuffix(char *, char *);
 int	swapsuffix(char *, int, char *, char *, char *);
 char	*strip(char *);
-int	findrepo(char *, int);
+int	findrepo(char *, int, int*);
 int	showprogress(int, int);
 u64int	murmurhash2(void*, usize);
+Qid	parseqid(char*);
 
 /* packing */
 void	dtinit(Dtab *, Object*);
--- a/sys/src/cmd/git/import
+++ b/sys/src/cmd/git/import
@@ -98,7 +98,8 @@
 		if(~ $#nocommit 0){
 			if(hash=`{git/save -n $aname -e $amail -N $name -E $email -m $msg -d $date $parents $files}){
 				echo $hash > $refpath
-				rm -f .git/index9/removed/$files
+				for(f in $files)
+					echo T NOQID 0 $f >> .git/INDEX9
 			}
 		}
 		status=''''
--- a/sys/src/cmd/git/init
+++ b/sys/src/cmd/git/init
@@ -31,7 +31,7 @@
 	echo '[branch "'$branch'"]'
 	echo '	remote = origin'
 }
-
+>$dir/.git/INDEX9
 >$dir/.git/HEAD {
 	echo ref: refs/heads/$branch
 }
--- a/sys/src/cmd/git/log.c
+++ b/sys/src/cmd/git/log.c
@@ -239,7 +239,7 @@
 main(int argc, char **argv)
 {
 	char path[1024], repo[1024], *p, *r;
-	int i, nrepo;
+	int i, nrel, nrepo;
 
 	ARGBEGIN{
 	case 'e':
@@ -259,7 +259,7 @@
 		break;
 	}ARGEND;
 
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	nrepo = strlen(repo);
 	if(argc != 0){
--- a/sys/src/cmd/git/merge
+++ b/sys/src/cmd/git/merge
@@ -38,8 +38,8 @@
 	git/revert .
 	exit ''
 }
-echo $ours >> .git/index9/merge-parents
-echo $theirs >> .git/index9/merge-parents
+echo $ours >> .git/merge-parents
+echo $theirs >> .git/merge-parents
 
 merge $ours $base $theirs
 >[1=2] echo 'merge complete: remember to commit'
--- a/sys/src/cmd/git/pack.c
+++ b/sys/src/cmd/git/pack.c
@@ -66,7 +66,7 @@
 Object *lruhead;
 Object *lrutail;
 vlong	ncache;
-vlong	cachemax = 512*MiB;
+vlong	cachemax = 1024*MiB;
 Packf	*packf;
 int	npackf;
 int	openpacks;
--- a/sys/src/cmd/git/query.c
+++ b/sys/src/cmd/git/query.c
@@ -152,10 +152,10 @@
 void
 main(int argc, char **argv)
 {
-	int i, j, n;
-	Hash *h;
-	char *p, *e, *s, *objpfx;
 	char query[2048], repo[512];
+	char *p, *e, *s, *objpfx;
+	int i, j, n, nrel;
+	Hash *h;
 
 	ARGBEGIN{
 	case 'd':	chattygit++;	break;
@@ -170,7 +170,7 @@
 
 	if(argc == 0)
 		usage();
-	if(findrepo(repo, sizeof(repo)) == -1)
+	if(findrepo(repo, sizeof(repo), &nrel) == -1)
 		sysfatal("find root: %r");
 	if(chdir(repo) == -1)
 		sysfatal("chdir: %r");
--- a/sys/src/cmd/git/save.c
+++ b/sys/src/cmd/git/save.c
@@ -10,6 +10,7 @@
 	char *dat;
 	int ndat;
 };
+
 enum {
 	Maxparents = 16,
 };
@@ -21,7 +22,9 @@
 char	*commitmsg;
 Hash	parents[Maxparents];
 int	nparents;
-
+Idxent	*idx;
+int	idxsz;
+int	nidx;
 int
 gitmode(Dirent *e)
 {
@@ -38,6 +41,20 @@
 }
 
 int
+idxcmp(void *pa, void *pb)
+{
+	Idxent *a, *b;
+	int c;
+
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	assert(a->order != b->order);
+	return a-> order < b->order ? -1 : 1;
+}
+
+int
 entcmp(void *pa, void *pb)
 {
 	char abuf[256], bbuf[256], *ae, *be;
@@ -184,27 +201,22 @@
 int
 tracked(char *path)
 {
-	char ipath[256];
-	Dir *d;
+	int lo, hi, mid, r;
 
-	/* Explicitly removed. */
-	snprint(ipath, sizeof(ipath), ".git/index9/removed/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/removed") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	d = dirstat(ipath);
-	if(d != nil && d->qid.type != QTDIR){
-		free(d);
-		return 0;
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		r = strcmp(path, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
 	}
-
-	/* Explicitly added. */
-	snprint(ipath, sizeof(ipath), ".git/index9/tracked/%s", path);
-	if(strstr(cleanname(ipath), ".git/index9/tracked") != ipath)
-		sysfatal("path %s leaves index", ipath);
-	if(access(ipath, AEXIST) == 0)
-		return 1;
-
-	return 0;
+	return r == 0 && idx[mid].state != 'R';
 }
 
 int
@@ -354,10 +366,11 @@
 void
 main(int argc, char **argv)
 {
+	char *ln, *dstr, *parts[4], cwd[1024];
+	int i, r, line, ncwd;
 	Hash th, ch;
-	char *dstr, cwd[1024];
-	int i, r, ncwd;
 	vlong date;
+	Biobuf *f;
 	Object *t;
 
 	gitinit();
@@ -425,6 +438,33 @@
 	}
 
 	t = findroot();
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	if((f = Bopen(".git/INDEX9", OREAD)) == nil)
+		sysfatal("open index: %r");
+	line = 0;
+	while((ln = Brdstr(f, '\n', 1)) != nil){
+		line++;
+		if(ln[0] == 0 || ln[0] == '\n')
+			continue;
+		if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+			sysfatal(".git/INDEX9:%d: corrupt index", line);
+		if(nidx == idxsz){
+			idxsz += idxsz/2;
+			idx = realloc(idx, idxsz*sizeof(Idxent));
+		}
+		cleanname(parts[3]);
+		idx[nidx].state = *parts[0];
+		idx[nidx].qid = parseqid(parts[1]);
+		idx[nidx].mode = strtol(parts[2], nil, 8);
+		idx[nidx].path = strdup(parts[3]);
+		idx[nidx].order = nidx;
+		nidx++;
+		free(ln);
+	}
+	Bterm(f);
+	qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	r = treeify(t, argv, argv + argc, 0, &th);
 	if(r == -1)
 		sysfatal("could not commit: %r\n");
--- a/sys/src/cmd/git/test/basic.rc
+++ b/sys/src/cmd/git/test/basic.rc
@@ -64,4 +64,3 @@
 q diff -c scratch/upstream/file.txt scratch/downstream/file.txt || die mismatch
 ! test -e scratch/upstream/file2.txt || die mismatch
 ! test -e scratch/downstream/file2.txt || die mismatch
-
--- a/sys/src/cmd/git/test/util.rc
+++ b/sys/src/cmd/git/test/util.rc
@@ -1,5 +1,5 @@
 fn q {
-	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out
+	{$* > /tmp/out.$pid && rm /tmp/out.$pid} || cat /tmp/out.$pid
 }
 fn qq {
 	$* >/dev/null >[2]/dev/null
--- a/sys/src/cmd/git/util.c
+++ b/sys/src/cmd/git/util.c
@@ -192,8 +192,10 @@
 	Qid q;
 
 	q = va_arg(fmt->args, Qid);
-	return fmtprint(fmt, "Qid{path=0x%llx(dir:%d,obj:%lld), vers=%ld, type=%d}",
-	    q.path, QDIR(&q), (q.path >> 8), q.vers, q.type);
+	if(q.path == ~0ULL && q.vers == ~0UL && q.type == 0xff)
+		return fmtprint(fmt, "NOQID");
+	else
+		return fmtprint(fmt, "%llux.%lud.%hhx", q.path, q.vers, q.type);
 }
 
 void
@@ -294,7 +296,7 @@
 
 /* Finds the directory containing the git repo. */
 int
-findrepo(char *buf, int nbuf)
+findrepo(char *buf, int nbuf, int *nrel)
 {
 	char *p, *suff;
 
@@ -302,6 +304,7 @@
 	if(getwd(buf, nbuf - strlen(suff) - 1) == nil)
 		return -1;
 
+	*nrel = 0;
 	for(p = buf + strlen(buf); p != nil; p = strrchr(buf, '/')){
 		strcpy(p, suff);
 		if(access(buf, AEXIST) == 0){
@@ -308,6 +311,7 @@
 			p[p == buf] = '\0';
 			return 0;
 		}
+		*nrel += 1;
 		*p = '\0';
 	}
 	werrstr("not a git repository");
@@ -441,4 +445,25 @@
 	h ^= h >> 15;
 
 	return h;
+}
+
+Qid
+parseqid(char *s)
+{
+	char *e;
+	Qid q;
+
+	if(strcmp(s, "NOQID") == 0)
+		return (Qid){-1, -1, -1};		
+	e = s;
+	q.path = strtoull(e, &e, 16);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.vers = strtoul(e+1, &e, 10);
+	if(*e != '.')
+		sysfatal("corrupt qid: %s (%s)\n", s, e);
+	q.type = strtoul(e+1, &e, 16);
+	if(*e != '\0')
+		sysfatal("corrupt qid: %s (%x)\n", s, *e);
+	return q;
 }
--- a/sys/src/cmd/git/walk.c
+++ b/sys/src/cmd/git/walk.c
@@ -2,53 +2,72 @@
 #include <libc.h>
 #include "git.h"
 
+typedef struct Seen	Seen;
+typedef struct Idxed	Idxed;
+typedef struct Idxent	Idxent;
+
 #define NCACHE 4096
-#define TDIR ".git/index9/tracked"
-#define RDIR ".git/index9/removed"
-typedef struct Cache	Cache;
-typedef struct Wres	Wres;
-struct Cache {
+
+enum {
+	Rflg	= 1 << 0,
+	Mflg	= 1 << 1,
+	Aflg	= 1 << 2,
+	Uflg	= 1 << 3,
+	/* everything after this is not an error */
+	Tflg	= 1 << 4,
+};
+
+struct Seen {
 	Dir*	cache;
 	int	n;
 	int	max;
 };
 
-struct Wres {
-	char	**path;
-	int	npath;
-	int	pathsz;
+struct Idxed {
+	char**	cache;
+	int	n;
+	int	max;
 };
 
-enum {
-	Rflg	= 1 << 0,
-	Mflg	= 1 << 1,
-	Aflg	= 1 << 2,
-	Tflg	= 1 << 3,
-};
+Seen	seentab[NCACHE];
+Idxed	idxtab[NCACHE];
+char	repopath[1024];
+char	wdirpath[1024];
+char	*rstr	= "R ";
+char	*mstr	= "M ";
+char	*astr	= "A ";
+char	*ustr	= "U ";
+char	*tstr 	= "T ";
+char	*bdir = ".git/fs/HEAD/tree";
+int	useidx	= 1;
+int	nrel;
+int	quiet;
+int	dirty;
+int	printflg;
 
-Cache seencache[NCACHE];
-int quiet;
-int useindex;
-int printflg;
-char *bdir = ".git/fs/HEAD/tree";
-char *rstr = "R ";
-char *tstr = "T ";
-char *mstr = "M ";
-char *astr = "A ";
+Idxent	*idx;
+int	idxsz;
+int	nidx;
+int	staleidx;
+Idxent	*wdir;
+int	wdirsz;
+int	nwdir;
 
+int	loadwdir(char*);
+
 int
 seen(Dir *dir)
 {
+	Seen *c;
 	Dir *dp;
 	int i;
-	Cache *c;
 
-	c = &seencache[dir->qid.path&(NCACHE-1)];
+	c = &seentab[dir->qid.path&(NCACHE-1)];
 	dp = c->cache;
 	for(i=0; i<c->n; i++, dp++)
-		if(dir->qid.path == dp->qid.path &&
-		   dir->type == dp->type &&
-		   dir->dev == dp->dev)
+		if(dir->qid.path == dp->qid.path
+		&& dir->qid.type == dp->qid.type
+		&& dir->dev == dp->dev)
 			return 1;
 	if(c->n == c->max){
 		if (c->max == 0)
@@ -63,129 +82,115 @@
 	return 0;
 }
 
-void
-grow(Wres *r)
-{
-	if(r->npath == r->pathsz){
-		r->pathsz = 2*r->pathsz + 1;
-		r->path = erealloc(r->path, r->pathsz * sizeof(char*));
-	}
-}
-
 int
-readpaths(Wres *r, char *pfx, char *dir)
+checkedin(Idxent *e, int change)
 {
-	char *f, *sub, *full, *sep;
-	Dir *d;
-	int fd, ret, i, n;
+	char *p;
+	int r;
 
-	d = nil;
-	ret = -1;
-	sep = "";
-	if(dir[0] != 0)
-		sep = "/";
-	if((full = smprint("%s/%s", pfx, dir)) == nil)
-		sysfatal("smprint: %r");
-	if((fd = open(full, OREAD)) < 0)
-		goto error;
-	while((n = dirread(fd, &d)) > 0){
-		for(i = 0; i < n; i++){
-			if(seen(&d[i]))
-				continue;
-			if(d[i].qid.type & QTDIR){
-				if((sub = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				if(readpaths(r, pfx, sub) == -1){
-					free(sub);
-					goto error;
-				}
-				free(sub);
-			}else{
-				grow(r);
-				if((f = smprint("%s%s%s", dir, sep, d[i].name)) == nil)
-					sysfatal("smprint: %r");
-				r->path[r->npath++] = f;
-			}
-		}
-		free(d);
+	p = smprint("%s/%s", bdir, e->path);
+	r = access(p, AEXIST);
+	if(r == 0 && change){
+		if(e->state != 'R')
+			e->state = 'T';
+		staleidx = 1;
 	}
-	ret = r->npath;
-error:
-	close(fd);
-	free(full);
-	return ret;
+	free(p);
+	return r == 0;
 }
 
 int
-cmp(void *pa, void *pb)
+indexed(char *path, int isdir)
 {
-	return strcmp(*(char **)pa, *(char **)pb);
-}
+	int lo, hi, mid, n, r;
+	char *s;
 
-void
-dedup(Wres *r)
-{
-	int i, o;
-
-	if(r->npath <= 1)
-		return;
-	o = 0;
-	qsort(r->path, r->npath, sizeof(r->path[0]), cmp);
-	for(i = 1; i < r->npath; i++)
-		if(strcmp(r->path[o], r->path[i]) != 0)
-			r->path[++o] = r->path[i];
-	r->npath = o + 1;
+	if(!useidx){
+		s = smprint("%s/%s", bdir, path);
+		r = access(s, AEXIST);
+		free(s);
+		return r == 0;
+	}
+	s = path;
+	if(isdir)
+		s = smprint("%s/", path);
+	r = -1;
+	lo = 0;
+	hi = nidx-1;
+	n = strlen(s);
+	while(lo <= hi){
+		mid = (hi + lo) / 2;
+		if(isdir)
+			r = strncmp(s, idx[mid].path, n);
+		else
+			r = strcmp(s, idx[mid].path);
+		if(r < 0)
+			hi = mid-1;
+		else if(r > 0)
+			lo = mid+1;
+		else
+			break;
+	}
+	if(isdir)
+		free(s);
+	return r == 0;
 }
 
 int
-sameqid(Dir *d, char *qf)
+idxcmp(void *pa, void *pb)
 {
-	char indexqid[64], fileqid[64], *p;
-	int fd, n;
+	Idxent *a, *b;
+	int c;
 
-	if(!d)
-		return 0;
-	if((fd = open(qf, OREAD)) == -1)
-		return 0;
-	if((n = readn(fd, indexqid, sizeof(indexqid) - 1)) == -1)
-		return 0;
-	indexqid[n] = 0;
-	close(fd);
-	if((p = strpbrk(indexqid, "  \t\n\r")) != nil)
-		*p = 0;
-
-	snprint(fileqid, sizeof(fileqid), "%ullx.%uld.%.2uhhx",
-		d->qid.path, d->qid.vers, d->qid.type);
-
-	if(strcmp(indexqid, fileqid) == 0)
-		return 1;
-	return 0;
+	a = (Idxent*)pa;
+	b = (Idxent*)pb;
+	if((c = strcmp(a->path, b->path)) != 0)
+		return c;
+	/* order is unique */
+	return a-> order < b->order ? -1 : 1;
 }
 
-void
-writeqid(Dir *d, char *qf)
-{
-	int fd;
-
-	if((fd = create(qf, OWRITE, 0666)) == -1)
-		return;
-	fprint(fd, "%ullx.%uld.%.2uhhx\n",
-		d->qid.path, d->qid.vers, d->qid.type);
-	close(fd);
-}
-
+/*
+ * compares whether the indexed entry 'a'
+ * has the same contents and mode as
+ * the entry on disk 'b'; if the indexed
+ * entry is nil, does a deep comparison
+ * of the checked out file and the file
+ * checked in.
+ */
 int
-samedata(char *pa, char *pb)
+samedata(Idxent *a, Idxent *b)
 {
-	char ba[32*1024], bb[32*1024];
+	char *gitpath, ba[IOUNIT], bb[IOUNIT];
 	int fa, fb, na, nb, same;
+	Dir *da, *db;
 
+	if(a != nil){
+		if(a->qid.path == b->qid.path
+		&& a->qid.vers == b->qid.vers
+		&& a->qid.type == b->qid.type
+		&& a->mode == b->mode
+		&& a->mode != 0)
+			return 1;
+	}
+
 	same = 0;
-	fa = open(pa, OREAD);
-	fb = open(pb, OREAD);
-	if(fa == -1 || fb == -1){
+	da = nil;
+	db = nil;
+	if((gitpath = smprint("%s/%s", bdir, b->path)) == nil)
+		sysfatal("smprint: %r");
+	fa = open(gitpath, OREAD);
+	fb = open(b->path, OREAD);
+	if(fa == -1 || fb == -1)
 		goto mismatch;
-	}
+	da = dirfstat(fa);
+	db = dirfstat(fb);
+	if(da == nil || db == nil)
+		goto mismatch;
+	if(da->mode != db->mode)
+		goto mismatch;
+	if(da->length != db->length)
+		goto mismatch;
 	while(1){
 		if((na = readn(fa, ba, sizeof(ba))) == -1)
 			goto mismatch;
@@ -198,8 +203,16 @@
 		if(memcmp(ba, bb, na) != 0)
 			goto mismatch;
 	}
+	if(a != nil){
+		a->qid = db->qid;
+		a->mode = db->mode;
+		staleidx = 1;
+	}
 	same = 1;
+
 mismatch:
+	free(da);
+	free(db);
 	if(fa != -1)
 		close(fa);
 	if(fb != -1)
@@ -207,7 +220,129 @@
 	return same;
 }
 
+int
+loadent(char *dir, Dir *d, int fullpath)
+{
+	char *path;
+	int ret, isdir;
+	Idxent *e;
+
+	if(fullpath)
+		path = strdup(dir);
+	else
+		path = smprint("%s/%s", dir, d->name);
+	if(path == nil)
+		sysfatal("smprint: %r");
+
+	ret = 0;
+	isdir = d->qid.type & QTDIR;
+	cleanname(path);
+	if((printflg & Uflg) == 0 && !indexed(path, isdir))
+		return 0;
+	if(isdir){
+		ret = loadwdir(path);
+		free(path);
+	}else{
+		if(nwdir == wdirsz){
+			wdirsz += wdirsz/2;
+			wdir = erealloc(wdir, wdirsz*sizeof(Idxent));
+		}
+		e = wdir + nwdir;
+		e->path = path;
+		e->qid = d->qid;
+		e->mode = d->mode;
+		e->order = nwdir;
+		e->state = 'T';
+		nwdir++;
+	}
+	return ret;
+}
+
+int
+loadwdir(char *path)
+{
+	int fd, ret, i, n;
+	Dir *d, *e;
+
+	d = nil;
+	e = nil;
+	ret = -1;
+
+	cleanname(path);
+	if(strncmp(path, ".git/", 5) == 0)
+		return 0;
+	if((fd = open(path, OREAD)) < 0)
+		goto error;
+	if((e = dirfstat(fd)) == nil)
+		sysfatal("fstat: %r");
+	if(e->qid.type & QTDIR)
+		while((n = dirread(fd, &d)) > 0){
+			for(i = 0; i < n; i++)
+				if(loadent(path, &d[i], 0) == -1)
+					goto error;
+			free(d);
+		}
+	else{
+		if(loadent(path, e, 1) == -1)
+			goto error;
+	}
+	ret = 0;
+error:
+	free(e);
+	if(fd != -1)
+		close(fd);
+	return ret;
+}
+
+int
+pfxmatch(char *p, char **pfx, int *pfxlen, int npfx)
+{
+	int i;
+
+	if(npfx == 0)
+		return 1;
+	for(i = 0; i < npfx; i++){
+		if(strncmp(p, pfx[i], pfxlen[i]) != 0)
+			continue;
+		if(p[pfxlen[i]] == '/' || p[pfxlen[i]] == 0)
+			return 1;
+		if(strcmp(pfx[i], ".") == 0 || *pfx[i] == 0)
+			return 1;
+	}
+	return 0;
+}
+
+
+char*
+reporel(char *s)
+{
+	char *p;
+	int n;
+
+	if(*s == '/')
+		s = strdup(s);
+	else
+		s = smprint("%s/%s", wdirpath, s);
+	p = cleanname(s);
+	n = strlen(repopath);
+	if(strncmp(s, repopath, n) != 0)
+		sysfatal("path outside repo: %s", s);
+	p += n;
+	if(*p == '/')
+		p++;
+	memmove(s, p, strlen(p)+1);
+	return s;
+}
+
 void
+show(Biobuf *o, int flg, char *str, char *path)
+{
+	dirty |= flg;
+	if(!quiet && (printflg & flg))
+		Bprint(o, "%s%s\n", str, path);
+}
+
+void
 usage(void)
 {
 	fprint(2, "usage: %s [-qbc] [-f filt] [-b base] [paths...]\n", argv0);
@@ -217,14 +352,21 @@
 void
 main(int argc, char **argv)
 {
-	char *rpath, *tpath, *bpath, *base, buf[8], repo[512];
-	char *p, *e;
-	int i, dirty;
-	Wres r;
-	Hash h;
-	Dir *d;
+	char *p, *e, *ln, *base, **argrel, *parts[4], xbuf[8];
+	int i, j, c, line, wfd, *argn;
+	Biobuf *f, *o, *w;
+	Hash h, hd;
+	Dir rn;
 
 	gitinit();
+	if(access(".git/fs/ctl", AEXIST) != 0)
+		sysfatal("no running git/fs");
+	if(getwd(wdirpath, sizeof(wdirpath)) == nil)
+		sysfatal("getwd: %r");
+	if(findrepo(repopath, sizeof(repopath), &nrel) == -1)
+		sysfatal("find root: %r");
+	if(chdir(repopath) == -1)
+		sysfatal("chdir: %r");
 
 	ARGBEGIN{
 	case 'q':
@@ -235,6 +377,7 @@
 		tstr = "";
 		mstr = "";
 		astr = "";
+		ustr = "";
 		break;
 	case 'f':
 		for(p = EARGF(usage()); *p; p++)
@@ -243,109 +386,172 @@
 			case 'A':	printflg |= Aflg;	break;
 			case 'M':	printflg |= Mflg;	break;
 			case 'R':	printflg |= Rflg;	break;
+			case 'U':	printflg |= Uflg;	break;
 			default:	usage();		break;
 		}
 		break;
 	case 'b':
-		useindex = 0;
+		useidx = 0;
 		base = EARGF(usage());
 		if(resolveref(&h, base) == -1)
 			sysfatal("no such ref '%s'", base);
+		/* optimization: we're a lot faster when using the index */
+		if(resolveref(&hd, "HEAD") == 0 && hasheq(&h, &hd))
+			useidx = 1;
 		bdir = smprint(".git/fs/object/%H/tree", h);
 		break;
 	default:
 		usage();
-	}ARGEND
+	}ARGEND;
 
-	gitinit();
-	if(findrepo(repo, sizeof(repo)) == -1)
-		sysfatal("find root: %r");
-	if(chdir(repo) == -1)
-		sysfatal("chdir: %r");
-	if(access(".git/fs/ctl", AEXIST) != 0)
-		sysfatal("no running git/fs");
-	dirty = 0;
-	memset(&r, 0, sizeof(r));
 	if(printflg == 0)
 		printflg = Tflg | Aflg | Mflg | Rflg;
-	if(argc == 0){
-		if(access(TDIR, AEXIST) == 0 && readpaths(&r, TDIR, "") == -1)
-			sysfatal("read tracked: %r");
-		if(access(RDIR, AEXIST) == 0 && readpaths(&r, RDIR, "") == -1)
-			sysfatal("read removed: %r");
-		if(access(bdir, AEXIST) == 0 && readpaths(&r, bdir, "") == -1)
-			sysfatal("read base: %r");
-	}else{
-		for(i = 0; i < argc; i++){
-			tpath = smprint(TDIR"/%s", argv[i]);
-			rpath = smprint(RDIR"/%s", argv[i]);
-			bpath = smprint("%s/%s", bdir, argv[i]);
-			if((d = dirstat(tpath)) == nil && (d = dirstat(rpath)) == nil && (d = dirstat(bpath)) == nil)
-				goto nextarg;
-			if(d->mode & DMDIR){
-				readpaths(&r, TDIR, argv[i]);
-				readpaths(&r, RDIR, argv[i]);
-				readpaths(&r, bdir, argv[i]);
-			}else{
-				grow(&r);
-				r.path[r.npath++] = estrdup(argv[i]);
+
+	nidx = 0;
+	idxsz = 32;
+	idx = emalloc(idxsz*sizeof(Idxent));
+	nwdir = 0;
+	wdirsz = 32;
+	wdir = emalloc(wdirsz*sizeof(Idxent));
+	argrel = emalloc(argc*sizeof(char*));
+	argn = emalloc(argc*sizeof(int));
+	for(i = 0; i < argc; i++){
+		argrel[i] = reporel(argv[i]);
+		argn[i] = strlen(argrel[i]);
+	}
+	if((o = Bfdopen(1, OWRITE)) == nil)
+		sysfatal("open out: %r");
+	if(useidx){
+		if((f = Bopen(".git/INDEX9", OREAD)) == nil){
+			fprint(2, "open index: %r\n");
+			if(access(".git/index9", AEXIST) == 0){
+				fprint(2, "index format conversion needed:\n");
+				fprint(2, "\tcd %s && git/fs\n", repopath);
+				fprint(2, "\t@{cd .git/index9/removed >[2]/dev/null && walk -f | sed 's/^/R NOQID 0 /'} >> .git/INDEX9\n");
+				fprint(2, "\t@{cd .git/fs/HEAD/tree && walk -f | sed 's/^/T NOQID 0 /'} >> .git/INDEX9\n");
 			}
-nextarg:
-			free(bpath);
-			free(tpath);
-			free(rpath);
-			free(d);
+			exits("noindex");
 		}
+		line = 0;
+		while((ln = Brdstr(f, '\n', 1)) != nil){
+			line++;
+			/* allow blank lines */
+			if(ln[0] == 0 || ln[0] == '\n')
+				continue;
+			if(getfields(ln, parts, nelem(parts), 0, " \t") != nelem(parts))
+				sysfatal(".git/INDEX9:%d: corrupt index", line);
+			if(nidx == idxsz){
+				idxsz += idxsz/2;
+				idx = realloc(idx, idxsz*sizeof(Idxent));
+			}
+			cleanname(parts[3]);
+			idx[nidx].state = *parts[0];
+			idx[nidx].qid = parseqid(parts[1]);
+			idx[nidx].mode = strtol(parts[2], nil, 8);
+			idx[nidx].path = strdup(parts[3]);
+			idx[nidx].order = nidx;
+			nidx++;
+			free(ln);
+		}
+		qsort(idx, nidx, sizeof(Idxent), idxcmp);
 	}
-	dedup(&r);
 
-	for(i = 0; i < r.npath; i++){
-		p = r.path[i];
-		d = dirstat(p);
-		if(d && d->mode & DMDIR)
-			goto next;
-		rpath = smprint(RDIR"/%s", p);
-		tpath = smprint(TDIR"/%s", p);
-		bpath = smprint("%s/%s", bdir, p);
-		/* Fast path: we don't want to force access to the rpath. */
-		if(useindex && d && sameqid(d, tpath)) {
-			if(!quiet && (printflg & Tflg))
-				print("%s%s\n", tstr, p);
+	for(i = 0; i < argc; i++){
+		argrel[i] = reporel(argv[i]);
+		argn[i] = strlen(argrel[i]);
+	}
+	if(argc == 0)
+		loadwdir(".");
+	else for(i = 0; i < argc; i++)
+		loadwdir(argrel[i]);
+	qsort(wdir, nwdir, sizeof(Idxent), idxcmp);
+	for(i = 0; i < argc; i++){
+		argrel[i] = reporel(argv[i]);
+		argn[i] = strlen(argrel[i]);
+	}
+	i = 0;
+	j = 0;
+	while(i < nidx || j < nwdir){
+		/* find the last entry we tracked for a path */
+		while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0){
+			staleidx = 1;
+			i++;
+		}
+		while(j+1 < nwdir && strcmp(wdir[j].path, wdir[j+1].path) == 0)
+			j++;
+		if(i < nidx && !pfxmatch(idx[i].path, argrel, argn, argc)){
+			i++;
+			continue;
+		}
+		if(i >= nidx)
+			c = 1;
+		else if(j >= nwdir)
+			c = -1;
+		else
+			c = strcmp(idx[i].path, wdir[j].path);
+		/* exists in both index and on disk */
+		if(c == 0){
+			if(idx[i].state == 'R' && checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			else if(idx[i].state == 'A' && !checkedin(&idx[i], 1))
+				show(o, Aflg, astr, idx[i].path);
+			else if(!samedata(&idx[i], &wdir[j]))
+				show(o, Mflg, mstr, idx[i].path);
+			else
+				show(o, Tflg, tstr, idx[i].path);
+			i++;
+			j++;
+		/* only exists in index */
+		}else if(c < 0){
+			if(checkedin(&idx[i], 0))
+				show(o, Rflg, rstr, idx[i].path);
+			i++;
+		/* only exists on disk */
 		}else{
-			if(d == nil || (useindex && access(rpath, AEXIST) == 0)){
-				if(access(bpath, AEXIST) == 0){
-					dirty |= Rflg;
-					if(!quiet && (printflg & Rflg))
-						print("%s%s\n", rstr, p);
-				}
-			}else if(access(bpath, AEXIST) == -1) {
-				dirty |= Aflg;
-				if(!quiet && (printflg & Aflg))
-					print("%s%s\n", astr, p);
-			}else if(samedata(p, bpath)){
-				if(!quiet && (printflg & Tflg))
-					print("%s%s\n", tstr, p);
-				if(useindex)
-					writeqid(d, tpath);
-			}else{
-				dirty |= Mflg;
-				if(!quiet && (printflg & Mflg))
-					print("%s%s\n", mstr, p);
-			}
+			if(!useidx && checkedin(&wdir[j], 0)){
+				if(samedata(nil, &wdir[j]))
+					show(o, Tflg, tstr, wdir[j].path);
+				else
+					show(o, Mflg, mstr, wdir[j].path);
+			}else if(printflg & Uflg)
+				show(o, Uflg, ustr, wdir[j].path);
+			j++;
 		}
-		free(rpath);
-		free(tpath);
-		free(bpath);
-next:
-		free(d);
 	}
+	Bterm(o);
+
+	if(useidx && staleidx)
+	if((wfd = create(".git/INDEX9.new", OWRITE, 0644)) != -1){
+		if((w = Bfdopen(wfd, OWRITE)) == nil){
+			close(wfd);
+			goto Nope;
+		}
+		for(i = 0; i < nidx; i++){
+			while(i+1 < nidx && strcmp(idx[i].path, idx[i+1].path) == 0)
+				i++;
+			Bprint(w, "%c %Q %o %s\n",
+				idx[i].state,
+				idx[i].qid, 
+				idx[i].mode,
+				idx[i].path);
+		}
+		Bterm(w);
+		nulldir(&rn);
+		rn.name = "INDEX9";
+		if(remove(".git/INDEX9") == -1)
+			goto Nope;
+		if(dirwstat(".git/INDEX9.new", &rn) == -1)
+			sysfatal("rename: %r");
+	}
+
+Nope:
 	if(!dirty)
 		exits(nil);
 
-	p = buf;
-	e = buf + sizeof(buf);
+	p = xbuf;
+	e = p + sizeof(xbuf);
 	for(i = 0; (1 << i) != Tflg; i++)
 		if(dirty & (1 << i))
-			p = seprint(p, e, "%c", "DMAT"[i]);
-	exits(buf);
+			p = seprint(p, e, "%c", "RMAUT"[i]);
+	exits(xbuf);
 }