shithub: git9

Download patch

ref: f514fa1fa6b51ce20e33f3d3599fe728d223264e
parent: 3c7125f599580538d08a1380b80ebd1f2323a171
author: Ori Bernstein <ori@eigenstate.org>
date: Sun Dec 13 14:52:57 EST 2020

all: speedups

This change speeds up git9 through reducing the number of
index opens and reads, instead caching the index in memory.

Additionally, a few arrays are made to grow exponentially,
instead of by single elements.

This nets a 30% speedup in many cases.

--- a/git.h
+++ b/git.h
@@ -21,6 +21,7 @@
 
 enum {
 	Pathmax		= 512,
+	Npackcache	= 32,
 	Hashsz		= 20,
 	Pktmax		= 65536,
 };
@@ -269,7 +270,9 @@
 void	olsfree(Objlist *);
 
 /* util functions */
-void	dprint(int, char *, ...);
+#define dprint(lvl, ...) \
+	if(chattygit >= lvl) _dprint(__VA_ARGS__)
+void	_dprint(char *, ...);
 void	*eamalloc(ulong, ulong);
 void	*emalloc(ulong);
 void	*earealloc(void *, ulong, ulong);
--- a/pack.c
+++ b/pack.c
@@ -5,20 +5,27 @@
 #include "git.h"
 
 typedef struct Buf	Buf;
-typedef struct Objmeta	Objmeta;
+typedef struct Metavec	Metavec;
+typedef struct Meta	Meta;
 typedef struct Compout	Compout;
 typedef struct Packf	Packf;
 
 #define max(x, y) ((x) > (y) ? (x) : (y))
 
-struct Objmeta {
+struct Metavec {
+	Meta	**meta;
+	int	nmeta;
+	int	metasz;
+};
+
+struct Meta {
 	Object	*obj;
 	char	*path;
 	vlong	mtime;
 
 	/* The best delta we picked */
-	Objmeta	*head;
-	Objmeta *prev;
+	Meta	*head;
+	Meta *prev;
 	Object	*base;
 	Delta	*delta;
 	int	ndelta;
@@ -46,8 +53,9 @@
 struct Packf {
 	char	*path;
 	int	refs;
-	int	idxfd;
-	int	packfd;
+	int	pack;
+	char	*idx;
+	vlong	nidx;
 };
 
 static int	readpacked(Biobuf *, Object *, int);
@@ -57,7 +65,9 @@
 Object *lruhead;
 Object *lrutail;
 int	ncache;
-int	cachemax = 1024;
+int	cachemiss;
+int	cacherefresh;
+int	cachemax = 4096;
 Packf	*packf;
 int	npackf;
 
@@ -161,11 +171,44 @@
 }
 
 static Packf*
+loadpackf(Packf *pf, char *name, int nname, char *pack, char *idx)
+{
+	int ifd, pfd;
+	Dir *d;
+
+	memset(pf, 0, sizeof(Packf));
+	if((pf->path = smprint("%.*s", nname, name)) == nil)
+		return nil;
+	pfd = open(pack, OREAD);
+	ifd = open(idx, OREAD);
+	if(pfd == -1 || ifd == -1)
+		goto errorf;
+	pf->pack = pfd;
+	if((d = dirfstat(ifd)) == nil)
+		goto errorf;
+	pf->nidx = d->length;
+	pf->idx = emalloc(pf->nidx);
+	if(readn(ifd, pf->idx, pf->nidx) != pf->nidx)
+		goto errori;
+	free(d);
+	return pf;
+
+errori:
+	free(d);
+	free(pf->idx);
+errorf:
+	if(pfd != -1) close(pfd);
+	if(ifd != -1) close(ifd);
+	return nil;		
+}
+
+static Packf*
 findpackf(char *path)
 {
-	int np, pfd, ifd;
-	char buf[192];
+	static int warned;
+	char pack[128], idx[128];
 	Packf *pf;
+	int i, np;
 
 	np = strlen(path);
 	if(np > 4 && strcmp(path + np - 4, ".idx") == 0)
@@ -176,28 +219,32 @@
 		sysfatal("invalid pack path %s", path);
 		return nil;
 	}
-		
-	for(pf = &packf[0]; pf != &packf[npackf]; pf++)
-		if(strncmp(pf->path, path, np) == 0 && strlen(pf->path) == np)
+
+	i = 0;
+	while(i < npackf){
+		pf = &packf[i];
+		if(strncmp(pf->path, path, np) == 0 && strlen(pf->path) == np){
+			pf->refs++;
 			return pf;
-	snprint(buf, sizeof(buf), ".git/objects/pack/%.*s.idx", np, path);
-	ifd = open(buf, OREAD);
-	snprint(buf, sizeof(buf), ".git/objects/pack/%.*s.pack", np, path);
-	pfd = open(buf, OREAD);
-	if(ifd == -1 || pfd == -1)
-		goto error;
+		}
+		if(pf->refs != 0 || npackf < Npackcache){
+			i++;
+			continue;
+		}
+		if(!warned){
+			fprint(2, "warning: repacking may be a good idea\n");
+			warned++;
+		}
+		free(pf->path);
+		free(pf->idx);
+		close(pf->pack);
+		memmove(&pf[i], &pf[i+1], npackf-(i+1));
+		npackf--;
+	}
 	packf = earealloc(packf, ++npackf, sizeof(Packf));
-	pf = &packf[npackf-1];
-	pf->path = smprint("%.*s", np, path);
-	pf->idxfd = ifd;
-	pf->packfd = pfd;
-	return pf;
-
-error:
-	print("pack open error for %s: %r\n", path);
-	if(ifd != -1) close(ifd);
-	if(pfd != -1) close(pfd);
-	return nil;
+	snprint(pack, sizeof(pack), ".git/objects/pack/%.*s.pack", np, path);
+	snprint(idx, sizeof(idx), ".git/objects/pack/%.*s.idx", np, path);
+	return loadpackf(&packf[npackf-1], path, np, pack, idx);
 }
 
 static Biobuf*
@@ -209,32 +256,46 @@
 	if((pf = findpackf(path)) == nil)
 		return nil;
 	bfd = emalloc(sizeof(Biobuf));
-	Binit(bfd, pf->packfd, OREAD);
+	Binit(bfd, pf->pack, OREAD);
 	return bfd;
 
 	
 }
 
-static Biobuf*
-idxopen(char *path)
+static char*
+idxopen(char *path, int *len)
 {
-	Biobuf *bfd;
 	Packf *pf;
 
 	if((pf = findpackf(path)) == nil)
 		return nil;
-	bfd = emalloc(sizeof(Biobuf));
-	Binit(bfd, pf->idxfd, OREAD);
-	return bfd;
+	*len = pf->nidx;
+	return pf->idx;
 }
 
 static void
-pfclose(Biobuf *f)
+packclose(Biobuf *f)
 {
+	int fd, i;
+
+	fd = Bfildes(f);
+	for(i = 0; i < npackf; i++)
+		if(packf[i].pack == fd)
+			packf[i].refs--;
 	Bterm(f);
 	free(f);
 }
 
+static void
+idxclose(char *idx)
+{
+	int i;
+
+	for(i = 0; i < npackf; i++)
+		if(packf[i].idx == idx)
+			packf[i].refs--;
+}
+
 static u32int
 crc32(u32int crc, char *b, int nb)
 {
@@ -337,32 +398,6 @@
 	return d.len;
 }
 
-static int
-preadbe32(Biobuf *b, int *v, vlong off)
-{
-	char buf[4];
-	
-	if(Bseek(b, off, 0) == -1)
-		return -1;
-	if(Bread(b, buf, sizeof(buf)) == -1)
-		return -1;
-	*v = GETBE32(buf);
-
-	return 0;
-}
-static int
-preadbe64(Biobuf *b, vlong *v, vlong off)
-{
-	char buf[8];
-	
-	if(Bseek(b, off, 0) == -1)
-		return -1;
-	if(Bread(b, buf, sizeof(buf)) == -1)
-		return -1;
-	*v = GETBE64(buf);
-	return 0;
-}
-
 static vlong
 readvint(char *p, char **pp)
 {
@@ -640,13 +675,14 @@
 }
 
 vlong
-searchindex(Biobuf *f, Hash h)
+searchindex(char *idx, int nidx, Hash h)
 {
-	int lo, hi, idx, i, nent;
+	int lo, hi, hidx, i, nent;
 	vlong o, oo;
-	Hash hh;
 
 	o = 8;
+	if(nidx < 8 + 256*4)
+		return -1;
 	/*
 	 * Read the fanout table. The fanout table
 	 * contains 256 entries, corresponsding to
@@ -658,19 +694,15 @@
 	 */
 	if (h.h[0] == 0){
 		lo = 0;
-		if(preadbe32(f, &hi, o) == -1)
-			goto err;
+		hi = GETBE32(idx + o);
 	} else {
 		o += h.h[0]*4 - 4;
-		if(preadbe32(f, &lo, o + 0) == -1)
-			goto err;
-		if(preadbe32(f, &hi, o + 4) == -1)
-			goto err;
+		lo = GETBE32(idx + o);
+		hi = GETBE32(idx + o + 4);
 	}
 	if(hi == lo)
 		goto notfound;
-	if(preadbe32(f, &nent, 8 + 255*4) == -1)
-		goto err;
+	nent=GETBE32(idx + 8 + 255*4);
 
 	/*
 	 * Now that we know the range of hashes that the
@@ -677,18 +709,20 @@
 	 * entry may exist in, read them in so we can do
 	 * a bsearch.
 	 */
-	idx = -1;
-	Bseek(f, Hashsz*lo + 8 + 256*4, 0);
+	hidx = -1;
+	o = Hashsz*lo + 8 + 256*4;
 	for(i = 0; i < hi - lo; i++){
-		if(Bread(f, hh.h, sizeof(hh.h)) == -1)
+		if(o < 0 || o + sizeof(h.h) > nidx)
 			goto err;
-		if(hasheq(&hh, &h))
-			idx = lo + i;
+		if(memcmp(h.h, idx+o, sizeof(h.h)) == 0){
+			hidx = lo + i;
+			break;
+		}
+		o += sizeof(h.h);
 	}
-	if(idx == -1)
+	if(hidx == -1)
 		goto notfound;
 
-
 	/*
 	 * We found the entry. If it's 32 bits, then we
 	 * can just return the oset, otherwise the 32
@@ -699,22 +733,25 @@
 	oo += 256*4;		/* Fanout table */
 	oo += Hashsz*nent;	/* Hashes */
 	oo += 4*nent;		/* Checksums */
-	oo += 4*idx;		/* Offset offset */
-	if(preadbe32(f, &i, oo) == -1)
+	oo += 4*hidx;		/* Offset offset */
+	if(oo < 0 || oo + 4 > nidx)
 		goto err;
-	o = i & 0xffffffff;
+	i = GETBE32(idx + oo);
+	o = i & 0xffffffffULL;
 	if(o & (1ull << 31)){
 		o &= 0x7fffffff;
-		if(preadbe64(f, &o, o) == -1)
+		if(o < 0 || o + 8 >= nidx)
 			goto err;
+		o = GETBE64(idx + o);
 	}
 	return o;
 
 err:
-	fprint(2, "unable to read packfile: %r\n");
+	werrstr("out of bounds read");
 	return -1;
 notfound:
 	werrstr("not present: %H", h);
+	sysfatal("notfound\n");
 	return -1;		
 }
 
@@ -850,17 +887,25 @@
 parsetree(Object *o)
 {
 	char *p, buf[256];
-	int np, nn, m;
-	Dirent *t;
+	int np, nn, m, entsz, nent;
+	Dirent *t, *ent;
 
 	p = o->data;
 	np = o->size;
+
+	nent = 0;
+	entsz = 16;
+	ent = eamalloc(entsz, sizeof(Dirent));	
 	o->tree = emalloc(sizeof(Tinfo));
 	while(np > 0){
 		if(scanword(&p, &np, buf, sizeof(buf)) == -1)
 			break;
-		o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent));
-		t = &o->tree->ent[o->tree->nent - 1];
+		if(nent == entsz){
+			entsz *= 2;
+			ent = earealloc(ent, entsz, sizeof(Dirent));	
+		}
+		t = &ent[nent++];
+
 		memset(t, 0, sizeof(Dirent));
 		m = strtol(buf, nil, 8);
 		/* FIXME: symlinks and other BS */
@@ -884,6 +929,8 @@
 		p += sizeof(t->h.h);
 		np -= sizeof(t->h.h);
 	}
+	o->tree->ent = ent;
+	o->tree->nent = nent;
 }
 
 static void
@@ -908,11 +955,10 @@
 static Object*
 readidxobject(Biobuf *idx, Hash h, int flag)
 {
-	char path[Pathmax];
-	char hbuf[41];
+	char *idxbuf, path[Pathmax], hbuf[41];
 	Biobuf *f;
 	Object *obj, *new;
-	int l, i, n;
+	int l, i, n, nidx;
 	vlong o;
 	Dir *d;
 
@@ -921,8 +967,8 @@
 			/*
 			 * If we're indexing, we need to be careful
 			 * to only return objects within this pack,
-			 * so we don't load objects from outside the
-			 * current pack.
+			 * so if the objects exist outside the pack,
+			 * we don't index the wrong copy.
 			 */
 			if(!(obj->flag & Cidx))
 				return nil;
@@ -940,7 +986,9 @@
 		}
 		if(obj->flag & Cloaded)
 			return obj;
+		cacherefresh++;
 	}
+	cachemiss++;
 	if(flag & Cthin)
 		flag &= ~Cidx;
 	if(flag & Cidx)
@@ -972,10 +1020,10 @@
 		l = strlen(d[i].name);
 		if(l > 4 && strcmp(d[i].name + l - 4, ".idx") != 0)
 			continue;
-		if((f = idxopen(d[i].name)) == nil)
+		if((idxbuf = idxopen(d[i].name, &nidx)) == nil)
 			continue;
-		o = searchindex(f, h);
-		pfclose(f);
+		o = searchindex(idxbuf, nidx, h);
+		idxclose(idxbuf);
 		if(o != -1)
 			break;
 	}
@@ -988,7 +1036,7 @@
 		goto errorf;
 	if(readpacked(f, obj, flag) == -1)
 		goto errorf;
-	pfclose(f);
+	packclose(f);
 	parseobject(obj);
 	free(d);
 	cache(obj);
@@ -1208,11 +1256,11 @@
 static int
 deltaordercmp(void *pa, void *pb)
 {
-	Objmeta *a, *b;
+	Meta *a, *b;
 	int cmp;
 
-	a = *(Objmeta**)pa;
-	b = *(Objmeta**)pb;
+	a = *(Meta**)pa;
+	b = *(Meta**)pb;
 	if(a->obj->type != b->obj->type)
 		return a->obj->type - b->obj->type;
 	cmp = strcmp(a->path, b->path);
@@ -1226,10 +1274,10 @@
 static int
 writeordercmp(void *pa, void *pb)
 {
-	Objmeta *a, *b, *ahd, *bhd;
+	Meta *a, *b, *ahd, *bhd;
 
-	a = *(Objmeta**)pa;
-	b = *(Objmeta**)pb;
+	a = *(Meta**)pa;
+	b = *(Meta**)pb;
 	ahd = (a->head == nil) ? a : a->head;
 	bhd = (b->head == nil) ? b : b->head;
 	if(ahd->mtime != bhd->mtime)
@@ -1242,27 +1290,29 @@
 }
 
 static void
-addmeta(Objmeta ***meta, int *nmeta, Objset *has, Object *o, char *path, vlong mtime)
+addmeta(Metavec *v, Objset *has, Object *o, char *path, vlong mtime)
 {
-	Objmeta *m;
+	Meta *m;
 
 	if(oshas(has, o->hash))
 		return;
 	osadd(has, o);
-	if(meta == nil)
+	if(v == nil)
 		return;
-	m = emalloc(sizeof(Objmeta));
+	m = emalloc(sizeof(Meta));
 	m->obj = o;
 	m->path = estrdup(path);
 	m->mtime = mtime;
 
-	*meta = erealloc(*meta, (*nmeta + 1)*sizeof(Objmeta));
-	(*meta)[*nmeta] = m;
-	*nmeta += 1;
+	if(v->nmeta == v->metasz){
+		v->metasz = 2*v->metasz;
+		v->meta = earealloc(v->meta, v->metasz, sizeof(Meta*));
+	}
+	v->meta[v->nmeta++] = m;
 }
 
 static void
-freemeta(Objmeta *m)
+freemeta(Meta *m)
 {
 	free(m->delta);
 	free(m->path);
@@ -1270,7 +1320,7 @@
 }
 
 static int
-loadtree(Objmeta ***m, int *nm, Objset *has, Hash tree, char *dpath, vlong mtime)
+loadtree(Metavec *v, Objset *has, Hash tree, char *dpath, vlong mtime)
 {
 	Object *t, *o;
 	Dirent *e;
@@ -1281,7 +1331,7 @@
 		return 0;
 	if((t = readobject(tree)) == nil)
 		return -1;
-	addmeta(m, nm, has, t, dpath, mtime);
+	addmeta(v, has, t, dpath, mtime);
 	for(i = 0; i < t->tree->nent; i++){
 		e = &t->tree->ent[i];
 		if(oshas(has, e->h))
@@ -1292,8 +1342,8 @@
 		o = clearedobject(e->h, k);
 		p = smprint("%s/%s", dpath, e->name);
 		if(k == GBlob)
-			addmeta(m, nm, has, o, p, mtime);
-		else if(loadtree(m, nm, has, e->h, p, mtime) == -1){
+			addmeta(v, has, o, p, mtime);
+		else if(loadtree(v, has, e->h, p, mtime) == -1){
 			free(p);
 			return -1;
 		}
@@ -1304,7 +1354,7 @@
 }
 
 static int
-loadcommit(Objmeta ***m, int *nm, Objset *has, Hash h)
+loadcommit(Metavec *v, Objset *has, Hash h)
 {
 	Object *c;
 	int r;
@@ -1313,35 +1363,41 @@
 		return 0;
 	if((c = readobject(h)) == nil)
 		return -1;
-	addmeta(m, nm, has, c, "", c->commit->ctime);
-	r = loadtree(m, nm, has, c->commit->tree, "", c->commit->ctime);
+	addmeta(v, has, c, "", c->commit->ctime);
+	r = loadtree(v, has, c->commit->tree, "", c->commit->ctime);
 	unref(c);
 	return r;
 }
 
 static int
-readmeta(Hash *theirs, int ntheirs, Hash *ours, int nours, Objmeta ***m)
+readmeta(Hash *theirs, int ntheirs, Hash *ours, int nours, Meta ***m)
 {
 	Object **obj;
 	Objset has;
-	int i, nm, nobj;
+	int i, nobj;
+	Metavec v;
 
 	*m = nil;
-	nm = 0;
 	osinit(&has);
+	v.nmeta = 0;
+	v.metasz = 64;
+	v.meta = eamalloc(v.metasz, sizeof(Meta*));
 	if(findtwixt(theirs, ntheirs, ours, nours, &obj, &nobj) == -1)
 		sysfatal("load twixt: %r");
+
 	if(nobj == 0)
 		return 0;
 	for(i = 0; i < nours; i++)
 		if(!hasheq(&ours[i], &Zhash))
-			if(loadcommit(nil, nil, &has, ours[i]) == -1)
+			if(loadcommit(nil, &has, ours[i]) == -1)
 				goto out;
+	
 	for(i = 0; i < nobj; i++)
-		if(loadcommit(m, &nm, &has, obj[i]->hash) == -1)
+		if(loadcommit(&v, &has, obj[i]->hash) == -1)
 			goto out;
 	osclear(&has);
-	return nm;
+	*m = v.meta;
+	return v.nmeta;
 out:
 	osclear(&has);
 	free(*m);
@@ -1359,9 +1415,9 @@
 }
 
 static void
-pickdeltas(Objmeta **meta, int nmeta)
+pickdeltas(Meta **meta, int nmeta)
 {
-	Objmeta *m, *p;
+	Meta *m, *p;
 	Object *a, *b;
 	Delta *d;
 	int i, j, nd, sz, pct, best;
@@ -1369,7 +1425,7 @@
 	pct = 0;
 	dprint(1, "picking deltas\n");
 	fprint(2, "deltifying %d objects:   0%%", nmeta);
-	qsort(meta, nmeta, sizeof(Objmeta*), deltaordercmp);
+	qsort(meta, nmeta, sizeof(Meta*), deltaordercmp);
 	for(i = 0; i < nmeta; i++){
 		m = meta[i];
 		pct = showprogress((i*100) / nmeta, pct);
@@ -1471,7 +1527,7 @@
 }
 
 static int
-encodedelta(Objmeta *m, Object *o, Object *b, void **pp)
+encodedelta(Meta *m, Object *o, Object *b, void **pp)
 {
 	char *p, *bp, buf[16];
 	int len, sz, n, i, j;
@@ -1572,12 +1628,12 @@
 }
 
 static int
-genpack(int fd, Objmeta **meta, int nmeta, Hash *h, int odelta)
+genpack(int fd, Meta **meta, int nmeta, Hash *h, int odelta)
 {
 	int i, nh, nd, res, pct, ret;
 	DigestState *st;
 	Biobuf *bfd;
-	Objmeta *m;
+	Meta *m;
 	Object *o;
 	char *p, buf[32];
 
@@ -1597,10 +1653,11 @@
 	PUTBE32(buf, nmeta);
 	if(hwrite(bfd, buf, 4, &st) == -1)
 		return -1;
-	qsort(meta, nmeta, sizeof(Objmeta*), writeordercmp);
+	qsort(meta, nmeta, sizeof(Meta*), writeordercmp);
 	if(interactive)
 		fprint(2, "writing %d objects:   0%%", nmeta);
 	for(i = 0; i < nmeta; i++){
+
 		m = meta[i];
 		pct = showprogress((i*100)/nmeta, pct);
 		if((o = readobject(m->obj->hash)) == nil)
@@ -1630,7 +1687,8 @@
 		}
 		unref(o);
 	}
-	fprint(2, "\b\b\b\b100%%\n");
+	if(interactive)
+		fprint(2, "\b\b\b\b100%%\n");
 	sha1(nil, 0, h->h, st);
 	if(Bwrite(bfd, h->h, sizeof(h->h)) == -1)
 		goto error;
@@ -1644,7 +1702,7 @@
 int
 writepack(int fd, Hash *theirs, int ntheirs, Hash *ours, int nours, Hash *h)
 {
-	Objmeta **meta;
+	Meta **meta;
 	int i, r, nmeta;
 
 	if((nmeta = readmeta(theirs, ntheirs, ours, nours, &meta)) == -1)
--- a/util.c
+++ b/util.c
@@ -277,12 +277,10 @@
 }
 
 void
-dprint(int v, char *fmt, ...)
+_dprint(char *fmt, ...)
 {
 	va_list ap;
 
-	if(chattygit < v)
-		return;
 	va_start(ap, fmt);
 	vfprint(2, fmt, ap);
 	va_end(ap);