ref: 1e8a5c690d21a1b6fc3ff78b116541bb139c761f
parent: 6586b5031b32095f14dbd870a3f69bdf904ea2dd
author: Ori Bernstein <ori@eigenstate.org>
date: Thu Sep 17 15:21:19 EDT 2020
git/repack: maintain window in redeltification For every deltification, we were creating a delta block list. We were then deltifying against every block ten times. This meant that we creating ten times more block lists than we needed. Fixing this cuts our time in half for repakcing the myrddin mc repo.
--- a/delta.c
+++ b/delta.c
@@ -3,10 +3,6 @@
#include "git.h"
-typedef struct Dblock Dblock;
-typedef struct Delta Delta;
-typedef struct Dtab Dtab;
-
enum {
K = 3,
Bconst = 42,
@@ -16,19 +12,6 @@
Hlast = 1692137473L,
};
-struct Dblock {
- uchar *buf;
- int len;
- int off;
- u64int rhash;
-};
-
-struct Dtab {
- Dblock *b;
- int nb;
- int sz;
-};
-
static void
addblk(Dtab *dt, void *buf, int len, int off, u64int rh)
{
@@ -117,44 +100,58 @@
return len;
}
-
-Delta*
-deltify(void *targ, int ntarg, void *base, int nbase, int *pnd)
+void
+dtinit(Dtab *dt, void *base, int nbase)
{
- Dblock *k;
- Delta *d;
- Dtab dt;
- uchar *l, *s, *e, *eb, *bp, *tp;
- int i, nd, nb;
+ uchar *bp, *s, *e;
u64int rh;
-
+
bp = base;
- tp = targ;
s = bp;
e = bp;
- dt.nb = 0;
- dt.sz = 128;
- dt.b = emalloc(dt.sz*sizeof(Dblock));
+ rh = 0;
+ dt->nb = 0;
+ dt->sz = 128;
+ dt->b = emalloc(dt->sz*sizeof(Dblock));
while(e != bp + nbase){
e += nextblk(s, bp + nbase, &rh);
- addblk(&dt, s, e - s, s - bp, rh);
+ addblk(dt, s, e - s, s - bp, rh);
s = e;
}
+}
+void
+dtclear(Dtab *dt)
+{
+ free(dt->b);
+}
+
+Delta*
+deltify(void *targ, int ntarg, Dtab *dt, int *pnd)
+{
+ Dblock *k;
+ Delta *d;
+ uchar *l, *s, *e, *eb, *tp;
+ int i, nd, nb;
+ u64int rh;
+
+
+ tp = targ;
l = targ;
s = targ;
e = targ;
d = nil;
nd = 0;
+ rh = 0;
e += nextblk(s, tp + ntarg, &rh);
while(1){
- if((rh & Bmask) == Bconst && (k = findrough(&dt, rh)) != nil){
+ if((rh & Bmask) == Bconst && (k = findrough(dt, rh)) != nil){
if(sameblk(k, s, e)){
nb = k->len;
eb = k->buf + k->len;
/* stretch the block: 1<<24 is the max packfiles support. */
for(i = 0; i < (1<<24) - nb; i++){
- if(e == tp + ntarg || eb == bp + nbase)
+ if(e == tp + ntarg || eb == dt->base + dt->nbase)
break;
if(*e != *eb)
break;
@@ -179,6 +176,5 @@
}
emitdelta(&d, &nd, 0, l - tp, tp + ntarg - l);
*pnd = nd;
- free(dt.b);
return d;
}
--- a/git.h
+++ b/git.h
@@ -16,6 +16,8 @@
typedef struct Dirent Dirent;
typedef struct Idxent Idxent;
typedef struct Objlist Objlist;
+typedef struct Dtab Dtab;
+typedef struct Dblock Dblock;
enum {
/* 5k objects should be enough */
@@ -95,12 +97,6 @@
char islink;
};
-struct Delta {
- int cpy;
- int off;
- int len;
-};
-
struct Object {
/* Git data */
Hash hash;
@@ -156,6 +152,28 @@
int sz;
};
+struct Dtab {
+ uchar *base;
+ int nbase;
+ Dblock *b;
+ int nb;
+ int sz;
+};
+
+struct Dblock {
+ uchar *buf;
+ int len;
+ int off;
+ u64int rhash;
+};
+
+struct Delta {
+ int cpy;
+ int off;
+ int len;
+};
+
+
#define GETBE16(b)\
((((b)[0] & 0xFFul) << 8) | \
(((b)[1] & 0xFFul) << 0))
@@ -263,7 +281,9 @@
char *strip(char *);
/* packing */
-Delta* deltify(void*, int, void *, int, int *);
+void dtinit(Dtab *, void *, int);
+void dtclear(Dtab*);
+Delta* deltify(void*, int, Dtab*, int*);
/* proto handling */
int readpkt(Conn*, char*, int);
--- a/pack.c
+++ b/pack.c
@@ -13,11 +13,13 @@
char *path;
vlong mtime;
Hash hash;
+ Dtab tab;
Object *obj;
Object *base;
Delta *delta;
int ndelta;
+ Dtab dtab;
};
struct Compout {
@@ -1266,8 +1268,10 @@
fprint(2, "\b\b\b\b%3d%%", pcnt);
}
p = meta;
- if(i > 10)
+ if(i >= 10)
p = m - 10;
+ if(i >= 11)
+ dtclear(&p[-1].dtab);
if((a = readobject(m->hash)) == nil)
sysfatal("missing object %H", m->hash);
best = a->size;
@@ -1274,10 +1278,11 @@
m->base = nil;
m->delta = nil;
m->ndelta = 0;
+ dtinit(&m->dtab, a->data, a->size);
for(; p != m; p++){
if((b = readobject(p->hash)) == nil)
sysfatal("missing object %H", p->hash);
- d = deltify(a->data, a->size, b->data, b->size, &nd);
+ d = deltify(a->data, a->size, &p->dtab, &nd);
sz = deltasz(d, nd);
if(sz + 32 < best){
free(m->delta);
@@ -1291,6 +1296,8 @@
}
unref(a);
}
+ for(; p != m; p++)
+ dtclear(&p->dtab);
fprint(2, "\b\b\b\b100%%\n");
}