ref: 23992f867fce5f0bfd2170137f31816326c7a31f
parent: 738c1ea8ac43ffcb5589ad1fe749b8f13fc4a652
author: Ori Bernstein <ori@eigenstate.org>
date: Fri Sep 25 19:26:01 EDT 2020
git/repack: better sort order, offset deltas. Improves repack and index speed.
--- a/pack.c
+++ b/pack.c
@@ -8,21 +8,26 @@
typedef struct Objmeta Objmeta;
typedef struct Compout Compout;
+#define max(x, y) ((x) > (y) ? (x) : (y))
+
struct Objmeta {
- /* For sorting */
- int type;
+ Object *obj;
char *path;
vlong mtime;
- Hash hash;
/* The best delta we picked */
- Object *obj;
+ Objmeta *head;
+ Objmeta *prev;
Object *base;
Delta *delta;
int ndelta;
int nchain;
+ /* Only used for delta window */
Dtab dtab;
+
+ /* Only used for writing offset deltas */
+ vlong off;
};
struct Compout {
@@ -273,18 +278,19 @@
return 0;
}
-int
+static vlong
readvint(char *p, char **pp)
{
- int i, n, c;
+ int s, c;
+ vlong n;
- i = 0;
+ s = 0;
n = 0;
do {
c = *p++;
- n |= (c & 0x7f) << i;
- i += 7;
- } while (c & 0x80);
+ n |= (c & 0x7f) << s;
+ s += 7;
+ } while (c & 0x80 && s < 63);
*pp = p;
return n;
@@ -390,21 +396,19 @@
Object b;
char *d;
vlong r;
- int c, n;
+ int c, s, n;
- r = 0;
d = nil;
- while(1){
+ r = 0;
+ s = 0;
+ do {
if((c = Bgetc(f)) == -1)
- goto error;
- r |= c & 0x7f;
- if (!(c & 0x80))
- break;
- r++;
- r <<= 7;
- }while(c & 0x80);
+ return -1;
+ r |= (c & 0x7f) << s;
+ s += 7;
+ } while (c & 0x80 && s < 63);
- if(r > p){
+ if(r > p || s > 63){
werrstr("junk offset -%lld (from %lld)", r, p);
goto error;
}
@@ -1122,43 +1126,54 @@
}
static int
-dsortcmp(void *pa, void *pb)
+deltaordercmp(void *pa, void *pb)
{
Objmeta *a, *b;
int cmp;
- a = pa;
- b = pb;
- if(a->type != b->type)
- return a->type - b->type;
+ a = *(Objmeta**)pa;
+ b = *(Objmeta**)pb;
+ if(a->obj->type != b->obj->type)
+ return a->obj->type - b->obj->type;
cmp = strcmp(a->path, b->path);
if(cmp != 0)
return cmp;
if(a->mtime != b->mtime)
return a->mtime - b->mtime;
- return memcmp(a->hash.h, b->hash.h, sizeof(a->hash.h));
+ return memcmp(a->obj->hash.h, b->obj->hash.h, sizeof(a->obj->hash.h));
}
static int
-timecmp(void *pa, void *pb)
+writeordercmp(void *pa, void *pb)
{
- Objmeta *a, *b;
+ Objmeta *a, *b, *ahd, *bhd;
- a = pa;
- b = pb;
- return b->mtime - a->mtime;
+ a = *(Objmeta**)pa;
+ b = *(Objmeta**)pb;
+ ahd = (a->head == nil) ? a : a->head;
+ bhd = (b->head == nil) ? b : b->head;
+ if(ahd->mtime != bhd->mtime)
+ return bhd->mtime - ahd->mtime;
+ if(ahd != bhd)
+ return (uintptr)bhd - (uintptr)ahd;
+ if(a->nchain != b->nchain)
+ return a->nchain - b->nchain;
+ return a->mtime - b->mtime;
}
static void
-addmeta(Objmeta **m, int *nm, int type, Hash h, char *path, vlong mtime)
+addmeta(Objmeta ***meta, int *nmeta, Object *o, char *path, vlong mtime)
{
- *m = erealloc(*m, (*nm + 1)*sizeof(Objmeta));
- memset(&(*m)[*nm], 0, sizeof(Objmeta));
- (*m)[*nm].type = type;
- (*m)[*nm].path = estrdup(path);
- (*m)[*nm].mtime = mtime;
- (*m)[*nm].hash = h;
- *nm += 1;
+ Objmeta *m;
+
+ m = emalloc(sizeof(Objmeta));
+ m->obj = o;
+ m->path = estrdup(path);
+ m->mtime = mtime;
+
+ *meta = erealloc(*meta, (*nmeta + 1)*sizeof(Objmeta));
+ (*meta)[*nmeta] = m;
+ *nmeta += 1;
}
static void
@@ -1166,10 +1181,11 @@
{
free(m->delta);
free(m->path);
+ free(m);
}
static int
-loadtree(Objmeta **m, int *nm, Hash tree, char *dpath, vlong mtime, Objset *has)
+loadtree(Objmeta ***m, int *nm, Objset *has, Hash tree, char *dpath, vlong mtime)
{
Object *t, *o;
Dirent *e;
@@ -1181,7 +1197,7 @@
if((t = readobject(tree)) == nil)
return -1;
osadd(has, t);
- addmeta(m, nm, t->type, t->hash, dpath, mtime);
+ addmeta(m, nm, t, dpath, mtime);
for(i = 0; i < t->tree->nent; i++){
e = &t->tree->ent[i];
if(oshas(has, e->h))
@@ -1193,8 +1209,8 @@
p = smprint("%s/%s", dpath, e->name);
if(k == GBlob){
osadd(has, o);
- addmeta(m, nm, k, o->hash, p, mtime);
- }else if(loadtree(m, nm, e->h, p, mtime, has) == -1){
+ addmeta(m, nm, o, p, mtime);
+ }else if(loadtree(m, nm, has, e->h, p, mtime) == -1){
free(p);
return -1;
}
@@ -1205,7 +1221,7 @@
}
static int
-loadcommit(Hash h, Objset *has, Objmeta **m, int *nm)
+loadcommit(Objmeta ***m, int *nm, Objset *has, Hash h)
{
Object *c;
int r;
@@ -1215,14 +1231,14 @@
if((c = readobject(h)) == nil)
return -1;
osadd(has, c);
- addmeta(m, nm, c->type, c->hash, "", c->commit->ctime);
- r = loadtree(m, nm, c->commit->tree, "", c->commit->ctime, has);
+ addmeta(m, nm, c, "", c->commit->ctime);
+ r = loadtree(m, nm, has, c->commit->tree, "", c->commit->ctime);
unref(c);
return r;
}
static int
-readmeta(Object **commits, int ncommits, Objmeta **m)
+readmeta(Object **commits, int ncommits, Objmeta ***m)
{
Objset has;
int i, nm;
@@ -1232,7 +1248,7 @@
osinit(&has);
for(i = 0; i < ncommits; i++){
dprint(2, "loading commit %H\n", commits[i]->hash);
- if(loadcommit(commits[i]->hash, &has, m, &nm) == -1){
+ if(loadcommit(m, &nm, &has, commits[i]->hash) == -1){
free(*m);
return -1;
}
@@ -1252,20 +1268,18 @@
}
static void
-pickdeltas(Objmeta *meta, int nmeta)
+pickdeltas(Objmeta **meta, int nmeta)
{
Objmeta *m, *p;
Object *a, *b;
Delta *d;
- int i, x, nd, sz, pcnt, best;
+ int i, j, x, nd, sz, pcnt, best;
- m = nil;
- p = nil;
pcnt = 0;
fprint(2, "deltifying %d objects: 0%%", nmeta);
- qsort(meta, nmeta, sizeof(Objmeta), dsortcmp);
+ qsort(meta, nmeta, sizeof(Objmeta*), deltaordercmp);
for(i = 0; i < nmeta; i++){
- m = &meta[i];
+ m = meta[i];
x = (i*100) / nmeta;
if(x > pcnt){
pcnt = x;
@@ -1272,24 +1286,22 @@
if(pcnt%10 == 0)
fprint(2, "\b\b\b\b%3d%%", pcnt);
}
- p = meta;
- if(i >= 10)
- p = m - 10;
- if(i >= 11)
- dtclear(&p[-1].dtab);
- if((a = readobject(m->hash)) == nil)
- sysfatal("missing object %H", m->hash);
+ if((a = readobject(m->obj->hash)) == nil)
+ sysfatal("readobject %H: %r", m->obj->hash);
best = a->size;
m->base = nil;
m->delta = nil;
m->ndelta = 0;
dtinit(&m->dtab, a->data, a->size);
- for(; p != m; p++){
+ if(i >= 11)
+ dtclear(&meta[i-11]->dtab);
+ for(j = max(0, i - 10); j < i; j++){
+ p = meta[j];
/* lets not make the chains too long */
if(p->nchain >= 4095)
continue;
- if((b = readobject(p->hash)) == nil)
- sysfatal("missing object %H", p->hash);
+ if((b = readobject(p->obj->hash)) == nil)
+ sysfatal("readobject %H: %r", p->obj->hash);
d = deltify(a->data, a->size, &p->dtab, &nd);
sz = deltasz(d, nd);
if(sz + 32 < best){
@@ -1303,6 +1315,10 @@
m->delta = d;
m->ndelta = nd;
m->nchain = p->nchain + 1;
+ m->prev = p;
+ m->head = p->head;
+ if(m->head == nil)
+ m->head = p;
}else
free(d);
@@ -1310,8 +1326,8 @@
}
unref(a);
}
- for(; p != m; p++)
- dtclear(&p->dtab);
+ for(i = max(0, nmeta - 10); i < nmeta; i++)
+ dtclear(&meta[i]->dtab);
fprint(2, "\b\b\b\b100%%\n");
}
@@ -1443,7 +1459,6 @@
hdr[0] |= len & 0xf;
len >>= 4;
for(i = 1; len != 0; i++){
- assert(i < sizeof(hdr));
hdr[i-1] |= 0x80;
hdr[i] = len & 0x7f;
len >>= 7;
@@ -1452,14 +1467,31 @@
}
static int
-genpack(int fd, Objmeta *meta, int nmeta, Hash *h)
+packoff(char *hdr, int off)
{
+ int i;
+
+ hdr[0] = off & 0x7f;
+ off >>= 7;
+ for(i = 1; off != 0; i++){
+ assert(i < sizeof(hdr));
+ hdr[i-1] |= 0x80;
+ hdr[i] = off & 0x7f;
+ off >>= 7;
+ }
+ return i;
+
+}
+
+static int
+genpack(int fd, Objmeta **meta, int nmeta, Hash *h, int odelta)
+{
int i, nh, nd, x, res, pcnt, ret;
DigestState *st;
Biobuf *bfd;
Objmeta *m;
Object *o;
- char *p, buf[16];
+ char *p, buf[32];
st = nil;
ret = -1;
@@ -1476,10 +1508,10 @@
PUTBE32(buf, nmeta);
if(hwrite(bfd, buf, 4, &st) == -1)
return -1;
- qsort(meta, nmeta, sizeof(Objmeta), timecmp);
+ qsort(meta, nmeta, sizeof(Objmeta*), writeordercmp);
fprint(2, "writing %d objects: 0%%", nmeta);
for(i = 0; i < nmeta; i++){
- m = &meta[i];
+ m = meta[i];
x = (i*100) / nmeta;
if(x > pcnt){
pcnt = x;
@@ -1486,7 +1518,7 @@
if(pcnt%10 == 0)
fprint(2, "\b\b\b\b%3d%%", pcnt);
}
- if((o = readobject(m->hash)) == nil)
+ if((o = readobject(m->obj->hash)) == nil)
return -1;
if(m->delta == nil){
nh = packhdr(buf, o->type, o->size);
@@ -1494,10 +1526,26 @@
if(hcompress(bfd, o->data, o->size, &st) == -1)
goto error;
}else{
+ m->off = Boffset(bfd);
nd = encodedelta(m, o, m->base, &p);
- nh = packhdr(buf, GRdelta, nd);
- hwrite(bfd, buf, nh, &st);
- hwrite(bfd, m->base->hash.h, sizeof(m->base->hash.h), &st);
+ if(odelta && m->prev->off != 0){
+ nh = 0;
+ nh += packhdr(buf, GOdelta, nd);
+ nh += packoff(buf+nh, m->off - m->prev->off);
+#ifdef NOPE
+ print("Odelta %d %lld\n\t", nd, m->off - m->prev->off);
+ for(int j = 0; j < nh; j++)
+ print("%02x:", buf[j] & 0xff);
+ print("\n");
+ if(m->off == 444541)
+ abort();
+#endif
+ hwrite(bfd, buf, nh, &st);
+ }else{
+ nh = packhdr(buf, GRdelta, nd);
+ hwrite(bfd, buf, nh, &st);
+ hwrite(bfd, m->base->hash.h, sizeof(m->base->hash.h), &st);
+ }
res = hcompress(bfd, p, nd, &st);
free(p);
if(res == -1)
@@ -1519,7 +1567,7 @@
int
writepack(int fd, Object **obj, int nobj, Hash *h)
{
- Objmeta *meta;
+ Objmeta **meta;
int i, r, nmeta;
dprint(1, "reading meta\n");
@@ -1528,9 +1576,9 @@
dprint(1, "picking deltas\n");
pickdeltas(meta, nmeta);
dprint(1, "generating pack\n");
- r = genpack(fd, meta, nmeta, h);
+ r = genpack(fd, meta, nmeta, h, 1);
for(i = 0; i < nmeta; i++)
- freemeta(&meta[i]);
+ freemeta(meta[i]);
free(meta);
return r;
}