shithub: gefs

ref: f0de08f79d1d6931c3829f4ad33818de5479b283
dir: /fs.c/

View raw version
#include <u.h>
#include <libc.h>
#include <auth.h>
#include <fcall.h>
#include <avl.h>

#include "dat.h"
#include "fns.h"
#include "atomic.h"

int
walk1(Tree *t, vlong up, char *name, Qid *qid, vlong *len)
{
	char *p, kbuf[Keymax], rbuf[Kvmax];
	int err;
	Xdir d;
	Kvp kv;
	Key k;

	err = 0;
	if((p = packdkey(kbuf, sizeof(kbuf), up, name)) == nil)
		return -1;
	k.k = kbuf;
	k.nk = p - kbuf;
	if(err)
		return -1;
	if(btlookup(t, &k, &kv, rbuf, sizeof(rbuf)) != nil)
		return -1;
	if(kv2dir(&kv, &d) == -1)
		return -1;
	*qid = d.qid;
	*len = d.length;
	return 0;
}

static void
snapfs(Amsg *a)
{
	Tree *t, *s;
	Mount *mnt;
	char *e;

	lock(&fs->mountlk);
	t = nil;
	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next){
		if(strcmp(a->old, mnt->name) == 0){
			updatesnap(&mnt->root, mnt->root, mnt->name);
			t = agetp(&mnt->root);
			ainc(&t->memref);
			break;
		}
	}
	if(t == nil && (t = opensnap(a->old, nil)) == nil){
		fprint(a->fd, "snap: open '%s': does not exist\n", a->old);
		unlock(&fs->mountlk);
		return;
	}
	if(a->delete){
		if(mnt != nil) {
			fprint(a->fd, "snap: snap is mounted: '%s'\n", a->old);
			unlock(&fs->mountlk);
			return;
		}
		if((e = delsnap(t, t->succ, a->old)) != nil){
			fprint(a->fd, "snap: error deleting '%s': %s\n", a->new, e);
			unlock(&fs->mountlk);
			return;
		}
	}else{
		if((s = opensnap(a->new, nil)) != nil){
			fprint(a->fd, "snap: already exists '%s'\n", a->new);
			closesnap(s);
			unlock(&fs->mountlk);
			return;
		}
		if((e = tagsnap(t, a->new, a->mutable)) != nil){
			fprint(a->fd, "snap: error creating '%s': %s\n", a->new, e);
			unlock(&fs->mountlk);
			return;
		}
	}
	closesnap(t);
	unlock(&fs->mountlk);
	/* we probably want explicit snapshots to get synced */
	if(a->delete)
		fprint(a->fd, "deleted: %s\n", a->old);
	else if(a->mutable)
		fprint(a->fd, "forked: %s from %s\n", a->new, a->old);
	else
		fprint(a->fd, "labeled: %s from %s\n", a->new, a->old);
}

static void
filldumpdir(Xdir *d)
{
	memset(d, 0, sizeof(Xdir));
	d->name = "/";
	d->qid.path = Qdump;
	d->qid.vers = fs->nextgen;
	d->qid.type = QTDIR;
	d->mode = 0555;
	d->atime = 0;
	d->mtime = 0;
	d->length = 0;
	d->uid = -1;
	d->gid = -1;
	d->muid = -1;
}

static int
okname(char *name)
{
	int i;

	if(name[0] == 0)
		return -1;
	if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0)
		return -1;
	for(i = 0; i < Maxname; i++){
		if(name[i] == 0)
			return 0;
		if((name[i]&0xff) < 0x20 || name[i] == '/')
			return -1;
	}
	return -1;
}

Chan*
mkchan(int size)
{
	Chan *c;

	if((c = mallocz(sizeof(Chan) + size*sizeof(void*), 1)) == nil)
		sysfatal("create channel");
	c->size = size;
	c->avail = size;
	c->count = 0;
	c->rp = c->args;
	c->wp = c->args;
	return c;

}

void*
chrecv(Chan *c)
{
	void *a;
	long v;

	v = agetl(&c->count);
	if(v == 0 || !acasl(&c->count, v, v-1))
		semacquire(&c->count, 1);
	lock(&c->rl);
	a = *c->rp;
	if(++c->rp >= &c->args[c->size])
		c->rp = c->args;
	unlock(&c->rl);
	semrelease(&c->avail, 1);
	return a;
}

void
chsend(Chan *c, void *m)
{
	long v;

	v = agetl(&c->avail);
	if(v == 0 || !acasl(&c->avail, v, v-1))
		semacquire(&c->avail, 1);
	lock(&c->wl);
	*c->wp = m;
	if(++c->wp >= &c->args[c->size])
		c->wp = c->args;
	unlock(&c->wl);
	semrelease(&c->count, 1);
}

static void
fshangup(Conn *c, char *fmt, ...)
{
	char buf[ERRMAX];
	va_list ap;

	va_start(ap, fmt);
	vsnprint(buf, sizeof(buf), fmt, ap);
	va_end(ap);
	fprint(2, "%s\n", buf);
	close(c->rfd);
	close(c->wfd);
}

static void
respond(Fmsg *m, Fcall *r)
{
	RWLock *lk;
	uchar buf[Max9p+IOHDRSZ];
	int w, n;

	r->tag = m->tag;
	dprint("→ %F\n", r);
	assert(m->type+1 == r->type || r->type == Rerror);
	if((n = convS2M(r, buf, sizeof(buf))) == 0)
		abort();
	qlock(&m->conn->wrlk);
	w = write(m->conn->wfd, buf, n);
	qunlock(&m->conn->wrlk);
	if(w != n)
		fshangup(m->conn, Eio);
	if(m->type == Tflush){
		lk = &fs->flushq[ihash(m->oldtag) % Nflushtab];
		wunlock(lk);
	}else{
		lk = &fs->flushq[ihash(m->tag) % Nflushtab];
		runlock(lk);
	}
	free(m);
}

static void
rerror(Fmsg *m, char *fmt, ...)
{
	char buf[128];
	va_list ap;
	Fcall r;

	va_start(ap, fmt);
	vsnprint(buf, sizeof(buf), fmt, ap);
	va_end(ap);
	r.type = Rerror;
	r.ename = buf;
	respond(m, &r);
}


static char*
lookup(Mount *mnt, Key *k, Kvp *kv, char *buf, int nbuf)
{
	char *e;
	Tree *r;

	if(mnt == nil)
		return Eattach;

	r = agetp(&mnt->root);
	e = btlookup(r, k, kv, buf, nbuf);
	return e;
}

static char*
upsert(Mount *mnt, Msg *m, int nm)
{
	char *e;

	if(!mnt->mutable)
		return Erdonly;
	if(mnt->root->nlbl != 1 || mnt->root->nref != 0)
		if((e = updatesnap(&mnt->root, mnt->root, mnt->name)) != nil)
			return e;
	return btupsert(mnt->root, m, nm);
}

static int
readb(Fid *f, char *d, vlong o, vlong n, vlong sz)
{
	char *e, buf[17], kvbuf[17+32];
	vlong fb, fo;
	Bptr bp;
	Blk *b;
	Key k;
	Kvp kv;

	if(o >= sz)
		return 0;

	fb = o & ~(Blksz-1);
	fo = o & (Blksz-1);
	if(fo+n > Blksz)
		n = Blksz-fo;

	k.k = buf;
	k.nk = sizeof(buf);
	k.k[0] = Kdat;
	PACK64(k.k+1, f->qpath);
	PACK64(k.k+9, fb);

	e = lookup(f->mnt, &k, &kv, kvbuf, sizeof(kvbuf));
	if(e != nil){
		if(e != Esrch){
			werrstr(e);
			return -1;
		}
		memset(d, 0, n);
		return n;
	}

	bp = unpackbp(kv.v, kv.nv);
	if((b = getblk(bp, GBraw)) == nil)
		return -1;
	memcpy(d, b->buf+fo, n);
	dropblk(b);
	return n;
}

static int
writeb(Fid *f, Msg *m, Bptr *ret, char *s, vlong o, vlong n, vlong sz)
{
	char *e, buf[Kvmax];
	vlong fb, fo;
	Blk *b, *t;
	Bptr bp;
	Kvp kv;

	fb = o & ~(Blksz-1);
	fo = o & (Blksz-1);

	m->k[0] = Kdat;
	PACK64(m->k+1, f->qpath);
	PACK64(m->k+9, fb);

	b = newdblk(f->mnt->root, Tdat, f->qpath);
	if(b == nil)
		return -1;
	t = nil;
	e = lookup(f->mnt, m, &kv, buf, sizeof(buf));
	if(e == nil){
		bp = unpackbp(kv.v, kv.nv);
		if(fb < sz && (fo != 0 || n != Blksz)){
			if((t = getblk(bp, GBraw)) == nil)
				return -1;
			memcpy(b->buf, t->buf, Blksz);
			dropblk(t);
		}
		freeblk(f->mnt->root, nil, bp);
	}else if(e != Esrch){
		werrstr("%s", e);
		return -1;
	}
	if(fo+n > Blksz)
		n = Blksz-fo;
	memcpy(b->buf+fo, s, n);
	if(t == nil){
		if(fo > 0)
			memset(b->buf, 0, fo);
		if(fo+n < Blksz)
			memset(b->buf+fo+n, 0, Blksz-fo-n);
	}
	enqueue(b);

	packbp(m->v, m->nv, &b->bp);
	*ret = b->bp;
	dropblk(b);
	return n;
}

static Dent*
getdent(vlong pqid, Xdir *d)
{
	Dent *de;
	char *e;
	u32int h;

	h = ihash(d->qid.path) % Ndtab;
	lock(&fs->dtablk);
	for(de = fs->dtab[h]; de != nil; de = de->next){
		if(de->qid.path == d->qid.path){
			ainc(&de->ref);
			goto Out;
		}
	}

	if((de = mallocz(sizeof(Dent), 1)) == nil)
		goto Out;
	de->Xdir = *d;
	de->ref = 1;
	de->up = pqid;
	de->qid = d->qid;
	de->length = d->length;

	if((e = packdkey(de->buf, sizeof(de->buf), pqid, d->name)) == nil){
		free(de);
		de = nil;
		goto Out;
	}
	de->k = de->buf;
	de->nk = e - de->buf;
	de->name = de->buf + 11;
	de->next = fs->dtab[h];
	fs->dtab[h] = de;

Out:
	unlock(&fs->dtablk);
	return de;
}

Mount *
getmount(char *name)
{
	Mount *mnt;
	Tree *t;

	if(strcmp(name, "dump") == 0){
		ainc(&fs->snapmnt->ref);
		return fs->snapmnt;
	}
	lock(&fs->mountlk);
	for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
		if(strcmp(name, mnt->name) == 0){
			ainc(&mnt->ref);
			goto Out;
		}
	if((mnt = mallocz(sizeof(*mnt), 1)) == nil)
		goto Out;
	mnt->ref = 1;
	if((mnt->name = strdup(name)) == nil){
		free(mnt);
		mnt = nil;
		goto Out;
	}
	if((t = opensnap(name, &mnt->mutable)) == nil){
		werrstr("%s", Enosnap);
		free(mnt->name);
		free(mnt);
		mnt = nil;
		goto Out;
	}
	mnt->root = t;
	mnt->next = fs->mounts;
	fs->mounts = mnt;

Out:
	unlock(&fs->mountlk);
	return mnt;
}

void
clunkmount(Mount *mnt)
{
	Mount *me, **p;

	if(mnt == nil)
		return;
	lock(&fs->mountlk);
	if(adec(&mnt->ref) == 0){
		for(p = &fs->mounts; (me = *p) != nil; p = &me->next){
			if(me == mnt)
				break;
		}
		assert(me != nil);
		*p = me->next;
		free(mnt->name);
		free(mnt);
	}
	unlock(&fs->mountlk);
}

static void
clunkdent(Dent *de)
{
	Dent *e, **pe;
	u32int h;

	if(de == nil)
		return;
	if(de->qid.type == QTAUTH && adec(&de->ref) == 0){
		free(de);
		return;
	}
	lock(&fs->dtablk);
	if(adec(&de->ref) != 0)
		goto Out;
	h = ihash(de->qid.path) % Ndtab;
	pe = &fs->dtab[h];
	for(e = fs->dtab[h]; e != nil; e = e->next){
		if(e == de)
			break;
		pe = &e->next;
	}
	assert(e != nil);
	*pe = e->next;
	free(de);
Out:
	unlock(&fs->dtablk);
}

void
showfid(int fd, char**, int)
{
	int i;
	Fid *f;
	Conn *c;

	for(c = fs->conns; c != nil; c = c->next){
		fprint(fd, "fids:\n");
		for(i = 0; i < Nfidtab; i++){
			lock(&c->fidtablk[i]);
			for(f = c->fidtab[i]; f != nil; f = f->next){
				rlock(f->dent);
				fprint(fd, "\tfid[%d] from %#zx: %d [refs=%ld, k=%K, qid=%Q]\n",
					i, getmalloctag(f), f->fid, f->dent->ref, &f->dent->Key, f->dent->qid);
				runlock(f->dent);
			}
			unlock(&c->fidtablk[i]);
		}
	}
}

static Fid*
getfid(Conn *c, u32int fid)
{
	u32int h;
	Fid *f;

	h = ihash(fid) % Nfidtab;
	lock(&c->fidtablk[h]);
	for(f = c->fidtab[h]; f != nil; f = f->next)
		if(f->fid == fid){
			ainc(&f->ref);
			break;
		}
	unlock(&c->fidtablk[h]);
	return f;
}

static void
putfid(Fid *f)
{
	if(adec(&f->ref) != 0)
		return;
	clunkmount(f->mnt);
	clunkdent(f->dent);
	free(f);
}

static Fid*
dupfid(Conn *c, u32int new, Fid *f)
{
	Fid *n, *o;
	u32int h;

	h = ihash(new) % Nfidtab;
	if((n = malloc(sizeof(Fid))) == nil)
		return nil;

	*n = *f;
	n->fid = new;
	n->ref = 2; /* one for dup, one for clunk */
	n->mode = -1;
	n->next = nil;

	lock(&c->fidtablk[h]);
	for(o = c->fidtab[h]; o != nil; o = o->next)
		if(o->fid == new)
			break;
	if(o == nil){
		n->next = c->fidtab[h];
		c->fidtab[h] = n;
	}
	unlock(&c->fidtablk[h]);

	if(o != nil){
		fprint(2, "fid in use: %d == %d\n", o->fid, new);
		free(n);
		return nil;
	}
	if(n->mnt != nil)
		ainc(&n->mnt->ref);
	ainc(&n->dent->ref);
	setmalloctag(n, getcallerpc(&c));
	return n;
}

static void
clunkfid(Conn *c, Fid *fid)
{
	Fid *f, **pf;
	u32int h;

	h = ihash(fid->fid) % Nfidtab;
	lock(&c->fidtablk[h]);
	pf = &c->fidtab[h];
	for(f = c->fidtab[h]; f != nil; f = f->next){
		if(f == fid){
			assert(adec(&f->ref) != 0);
			*pf = f->next;
			break;
		}
		pf = &f->next;
	}
	assert(f != nil);
	unlock(&c->fidtablk[h]);
}

static int
readmsg(Conn *c, Fmsg **pm)
{
	char szbuf[4];
	int sz, n;
	Fmsg *m;

	n = readn(c->rfd, szbuf, 4);
	if(n <= 0){
		*pm = nil;
		return n;
	}
	if(n != 4){
		werrstr("short read: %r");
		return -1;
	}
	sz = GBIT32(szbuf);
	if(sz > c->iounit){
		werrstr("message size too large");
		return -1;
	}
	if((m = malloc(sizeof(Fmsg)+sz)) == nil)
		return -1;
	if(readn(c->rfd, m->buf+4, sz-4) != sz-4){
		werrstr("short read: %r");
		free(m);
		return -1;
	}
	m->conn = c;
	m->sz = sz;
	PBIT32(m->buf, sz);
	*pm = m;
	return 0;
}

static void
fsversion(Fmsg *m)
{
	Fcall r;
	char *p;

	memset(&r, 0, sizeof(Fcall));
	p = strchr(m->version, '.');
	if(p != nil)
		*p = '\0';
	r.type = Rversion;
	r.msize = Max9p + IOHDRSZ;
	if(strcmp(m->version, "9P2000") == 0){
		if(m->msize < r.msize)
			r.msize = m->msize;
		r.version = "9P2000";
		m->conn->versioned = 1;
		m->conn->iounit = r.msize;
	}else{
		r.version = "unknown";
		m->conn->versioned = 0;
	}
	respond(m, &r);
}

void
authfree(AuthRpc *auth)
{
	AuthRpc *rpc;

	if(rpc = auth){
		close(rpc->afd);
		auth_freerpc(rpc);
	}
}

AuthRpc*
authnew(void)
{
	static char *keyspec = "proto=p9any role=server";
	AuthRpc *rpc;
	int fd;

	if(access("/mnt/factotum", 0) < 0)
		if((fd = open("/srv/factotum", ORDWR)) >= 0)
			mount(fd, -1, "/mnt", MBEFORE, "");
	if((fd = open("/mnt/factotum/rpc", ORDWR)) < 0)
		return nil;
	if((rpc = auth_allocrpc(fd)) == nil){
		close(fd);
		return nil;
	}
	if(auth_rpc(rpc, "start", keyspec, strlen(keyspec)) != ARok){
		authfree(rpc);
		return nil;
	}
	return rpc;
}

static char*
authread(Fid *f, Fcall *r, void *data, vlong count)
{
	AuthInfo *ai;
	AuthRpc *rpc;
	User *u;

	if((rpc = f->auth) == nil)
		return Etype;

	switch(auth_rpc(rpc, "read", nil, 0)){
	default:
		return Eauthp;
	case ARdone:
		if((ai = auth_getinfo(rpc)) == nil)
			goto Phase;
		rlock(&fs->userlk);
		u = name2user(ai->cuid);
		auth_freeAI(ai);
		if(u == nil){
			runlock(&fs->userlk);
			return Enouser;
		}
		f->uid = u->id;
		runlock(&fs->userlk);
		return nil;
	case ARok:
		if(count < rpc->narg)
			return Eauthd;
		memmove(data, rpc->arg, rpc->narg);
		r->count = rpc->narg;
		return nil;
	case ARphase:
	Phase:
		return Eauthph;
	}
}

static char*
authwrite(Fid *f, Fcall *r, void *data, vlong count)
{
	AuthRpc *rpc;

	if((rpc = f->auth) == nil)
		return Etype;
	if(auth_rpc(rpc, "write", data, count) != ARok)
		return Ebotch;
	r->type = Rwrite;
	r->count = count;
	return nil;

}

static void
fsauth(Fmsg *m)
{
	Dent *de;
	Fcall r;
	Fid f;

	if(fs->noauth){
		rerror(m, Eauth);
		return;
	}
	if((de = mallocz(sizeof(Dent), 1)) == nil){
		rerror(m, Enomem);
		return;
	}
	memset(de, 0, sizeof(Dent));
	de->ref = 0;
	de->qid.type = QTAUTH;
	de->qid.path = aincv(&fs->nextqid, 1);
	de->qid.vers = 0;
	de->length = 0;
	de->k = nil;
	de->nk = 0;

	memset(&f, 0, sizeof(Fid));
	f.fid = NOFID;
	f.mnt = nil;
	f.qpath = de->qid.path;
	f.pqpath = de->qid.path;
	f.mode = -1;
	f.iounit = m->conn->iounit;
	f.dent = de;
	f.uid = -1;
	f.duid = -1;
	f.dgid = -1;
	f.dmode = 0600;
	f.auth = authnew();
	if(dupfid(m->conn, m->afid, &f) == nil){
		rerror(m, Efid);
		free(de);
		return;
	}
	r.type = Rauth;
	r.aqid = de->qid;
	respond(m, &r);
}

static int
ingroup(int uid, int gid)
{
	User *u, *g;
	int i, in;

	rlock(&fs->userlk);
	in = 0;
	u = uid2user(uid);
	g = uid2user(gid);
	if(u != nil && g != nil)
		for(i = 0; i < g->nmemb; i++)
			if(u->id == g->memb[i])
				in = 1;
	runlock(&fs->userlk);
	return in;
}

static int
groupleader(int uid, int gid)
{
	User *g;
	int i, lead;

	lead = 0;
	rlock(&fs->userlk);
	g = uid2user(gid);
	if(g != nil){
		if(g->lead == 0){
			for(i = 0; i < g->nmemb; i++)
				if(g->memb[i] == uid){
					lead = 1;
					break;
				}
		}else if(uid == g->lead)
			lead = 1;
	}
	runlock(&fs->userlk);
	return lead;

}

static int
mode2bits(int req)
{
	int m;

	m = 0;
	switch(req&0xf){
	case OREAD:	m = DMREAD;		break;
	case OWRITE:	m = DMWRITE;		break;
	case ORDWR:	m = DMREAD|DMWRITE;	break;
	case OEXEC:	m = DMREAD|DMEXEC;	break;
	}
	if(req&OTRUNC)
		m |= DMWRITE;
	return m;
}

static int
fsaccess(Fid *f, ulong fmode, int fuid, int fgid, int m)
{
	/* uid none gets only other permissions */
	if(permissive)
		return 0;
	if(f->uid != 0) {
		if(f->uid == fuid)
			if((m & (fmode>>6)) == m)
				return 0;
		if(ingroup(f->uid, fgid))
			if((m & (fmode>>3)) == m)
				return 0;
	}
	if(m & fmode) {
		if((fmode & DMDIR) && (m == DMEXEC))
			return 0;
		if(!ingroup(f->uid, 9999))
			return 0;
	}
	return -1;
}

static void
fsattach(Fmsg *m)
{
	char *e, *p, *n, dbuf[Kvmax], kvbuf[Kvmax];
	Mount *mnt;
	Dent *de;
	Tree *t;
	User *u;
	Fcall r;
	Xdir d;
	Kvp kv;
	Key dk;
	Fid f, *af;
	int uid;

	de = nil;
	if(m->aname[0] == '\0')
		m->aname = "main";
	if((mnt = getmount(m->aname)) == nil){
		rerror(m, "%r");
		goto Out;
	}

	rlock(&fs->userlk);
	n = m->uname;
	/*
	 * to allow people to add themselves to the user file,
	 * we need to force the user id to one that exists.
	 */
	if(permissive && strcmp(m->aname, "adm") == 0)
		n = "adm";
	if((u = name2user(n)) == nil){
		runlock(&fs->userlk);
		rerror(m, Enouser);
		goto Out;
	}
	uid = u->id;
	runlock(&fs->userlk);

	if(m->afid != NOFID){
		r.data = nil;
		r.count = 0;
		if((af = getfid(m->conn, m->afid)) == nil){
			rerror(m, Enofid);
			goto Out;
		}
		if((e = authread(af, &r, nil, 0)) != nil){
			rerror(m, e);
			putfid(af);
			goto Out;
		}
		if(af->uid != uid){
			rerror(m, Ebadu);
			putfid(af);
			goto Out;
		}
		putfid(af);
	}else if(!fs->noauth){
		rerror(m, Ebadu);
		goto Out;
	}

	if(strcmp(m->aname, "dump") == 0){
		memset(&d, 0, sizeof(d));
		filldumpdir(&d);
	}else{
		if((p = packdkey(dbuf, sizeof(dbuf), -1ULL, "")) == nil){
			rerror(m, Elength);
			goto Out;
		}
		dk.k = dbuf;
		dk.nk = p - dbuf;
		t = agetp(&mnt->root);
		if((e = btlookup(t, &dk, &kv, kvbuf, sizeof(kvbuf))) != nil){
			rerror(m, e);
			goto Out;
		}
		if(kv2dir(&kv, &d) == -1){
			rerror(m, Efs);
			goto Out;
		}
	}
	if((de = getdent(-1, &d)) == nil){
		rerror(m, Efs);
		goto Out;
	}

	memset(&f, 0, sizeof(Fid));
	f.fid = NOFID;
	f.mnt = mnt;
	f.qpath = d.qid.path;
	f.pqpath = d.qid.path;
	f.mode = -1;
	f.iounit = m->conn->iounit;
	f.dent = de;
	f.uid = uid;
	f.duid = d.uid;
	f.dgid = d.gid;
	f.dmode = d.mode;
	if(dupfid(m->conn, m->fid, &f) == nil){
		rerror(m, Efid);
		goto Out;
	}

	r.type = Rattach;
	r.qid = d.qid;
	respond(m, &r);

Out:
	clunkdent(de);
	clunkmount(mnt);
}

static char*
findparent(Fid *f, vlong *qpath, char **name, char *buf, int nbuf)
{
	char *p, *e, kbuf[Keymax];
	Kvp kv;
	Key k;

	if((p = packsuper(kbuf, sizeof(kbuf), f->pqpath)) == nil)
		return Elength;
	k.k = kbuf;
	k.nk = p - kbuf;
	if((e = lookup(f->mnt, &k, &kv, buf, nbuf)) != nil)
		return e;
	if((*name = unpackdkey(kv.v, kv.nv, qpath)) == nil)
		return Efs;
	return nil;
}

static void
fswalk(Fmsg *m)
{
	char *p, *e, *name, kbuf[Maxent], kvbuf[Kvmax];
	int duid, dgid, dmode;
	vlong up, prev;
	Fid *o, *f;
	Dent *dent;
	Mount *mnt;
	Fcall r;
	Xdir d;
	Kvp kv;
	Key k;
	int i;

	if((o = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	if(o->mode != -1){
		rerror(m, Einuse);
		putfid(o);
		return;
	}
	e = nil;
	mnt = o->mnt;
	up = o->qpath;
	prev = o->qpath;
	rlock(o->dent);
	d = *o->dent;
	runlock(o->dent);
	duid = d.uid;
	dgid = d.gid;
	dmode = d.mode;
	r.type = Rwalk;
	for(i = 0; i < m->nwname; i++){
		if(fsaccess(o, d.mode, d.uid, d.gid, DMEXEC) != 0){
			rerror(m, Eperm);
			putfid(o);
			return;
		}
		name = m->wname[i];
		if(d.qid.path == Qdump){
			if((mnt = getmount(m->wname[i])) == nil){
				rerror(m, Esrch);
				putfid(o);
				return;
			}
			if((p = packdkey(kbuf, sizeof(kbuf), -1ULL, "")) == nil){
				rerror(m, Elength);
				clunkmount(mnt);
				putfid(o);
				return;
			}
		}else{
			if(strcmp(m->wname[i], "..") == 0){
				if(o->pqpath == Qdump){
					mnt = fs->snapmnt;
					filldumpdir(&d);
					duid = d.uid;
					dgid = d.gid;
					dmode = d.mode;
					goto Found;
				}else if((e = findparent(o, &prev, &name, kbuf, sizeof(kbuf))) != nil){
					rerror(m, e);
					putfid(o);
					return;
				}
			}
			if((p = packdkey(kbuf, sizeof(kbuf), prev, name)) == nil){
				rerror(m, Elength);
				putfid(o);
				return;
			}
		}
		duid = d.uid;
		dgid = d.gid;
		dmode = d.mode;
		k.k = kbuf;
		k.nk = p - kbuf;
		if((e = lookup(mnt, &k, &kv, kvbuf, sizeof(kvbuf))) != nil)
			break;
		if(kv2dir(&kv, &d) == -1){
			rerror(m, Efs);
			putfid(o);
			return;
		}
Found:
		up = prev;
		prev = d.qid.path;
		r.wqid[i] = d.qid;
	}
	r.nwqid = i;
	if(i == 0 && m->nwname != 0){
		rerror(m, e);
		putfid(o);
		return;
	}
	f = o;
	if(m->fid != m->newfid && i == m->nwname){
		if((f = dupfid(m->conn, m->newfid, o)) == nil){
			rerror(m, Efid);
			putfid(o);
			return;
		}
		putfid(o);
	}
	if(i > 0 && i == m->nwname){
		lock(f);
		if(up == Qdump)
			dent = getdent(-1ULL, &d);
		else
			dent = getdent(up, &d);
		if(dent == nil){
			if(f != o)
				clunkfid(m->conn, f);
			rerror(m, Enomem);
			unlock(f);
			putfid(f);
			return;
		}
		if(mnt != f->mnt){
			clunkmount(f->mnt);
			ainc(&mnt->ref);
			f->mnt = mnt;
		}
		clunkdent(f->dent);
		f->qpath = r.wqid[i-1].path;
		f->pqpath = up;
		f->dent = dent;
		f->duid = duid;
		f->dgid = dgid;
		f->dmode = dmode;
		unlock(f);
	}
	respond(m, &r);
	putfid(f);
}

static void
fsstat(Fmsg *m)
{
	char buf[STATMAX];
	Fcall r;
	Fid *f;
	int n;

	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	rlock(f->dent);
	n = dir2statbuf(f->dent, buf, sizeof(buf));
	runlock(f->dent);
	if(n == -1){
		rerror(m, "stat: %r");
		putfid(f);
		return;
	}
	r.type = Rstat;
	r.stat = (uchar*)buf;
	r.nstat = n;
	respond(m, &r);
	putfid(f);
}

static void
fswstat(Fmsg *m, Amsg **ao)
{
	char rnbuf[Kvmax], opbuf[Kvmax], upbuf[Upksz];
	char *p, *e, strs[65535];
	int op, nm, rename, truncate;
	vlong oldlen;
	Qid old;
	Fcall r;
	Dent *de;
	Msg mb[3];
	Xdir n;
	Dir d;
	Tree *t;
	Fid *f;
	Key k;
	User *u;

	rename = 0;
	truncate = 0;
	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	de = f->dent;
	wlock(de);
	if(de->gone){
		rerror(m, Ephase);
		goto Out;
	}
	if((de->qid.type & QTAUTH) || (de->qid.path & Qdump)){
		rerror(m, Emode);
		goto Out;
	}
	if(convM2D(m->stat, m->nstat, &d, strs) <= BIT16SZ){
		rerror(m, Edir);
		goto Out;
	}

	t = agetp(&f->mnt->root);
	n = de->Xdir;
	n.qid.vers++;
	p = opbuf+1;
	op = 0;

	/* check validity of updated fields and construct Owstat message */
	if(d.qid.path != ~0 || d.qid.vers != ~0){
		if(d.qid.path != de->qid.path){
			rerror(m, Ewstatp);
			goto Out;
		}
		if(d.qid.vers != de->qid.vers){
			rerror(m, Ewstatv);
			goto Out;
		}
	}
	if(*d.name != '\0'){
		if(strcmp(d.name, de->name) != 0){
			rename = 1;
			if(okname(d.name) == -1){
				rerror(m, Ename);
				goto Out;
			}
			if(walk1(t, f->dent->up, d.name, &old, &oldlen) == 0){
				rerror(m, Eexist);
				goto Out;
			}
			n.name = d.name;
		}
	}
	if(d.length != ~0){
		if(d.length < 0){
			rerror(m, Ewstatl);
			goto Out;
		}
		if(d.length != de->length){
			if(d.length < de->length){
				if((*ao = malloc(sizeof(Amsg))) == nil){
					rerror(m, Enomem);
					goto Out;
				}
				aincl(&de->ref, 1);
				aincl(&f->mnt->ref, 1);
				(*ao)->op = AOclear;
				(*ao)->mnt = f->mnt;
				(*ao)->qpath = f->qpath;
				(*ao)->off = d.length;
				(*ao)->length = f->dent->length;
				(*ao)->dent = de;
				truncate = 1;
			}
			de->length = d.length;
			n.length = d.length;
			op |= Owsize;
			PACK64(p, n.length);
			p += 8;
		}
	}
	if(d.mode != ~0){
		if((d.mode^de->mode) & DMDIR){
			rerror(m, Ewstatd);
			goto Out;
		}
		if(d.mode & ~(DMDIR|DMAPPEND|DMEXCL|DMTMP|0777)){
			rerror(m, Ewstatb);
			goto Out;
		}
		if(d.mode != de->mode){
			n.mode = d.mode;
			n.qid.type = d.mode>>24;
			op |= Owmode;
			PACK32(p, n.mode);
			p += 4;
		}
	}
	if(d.mtime != ~0){
		n.mtime = d.mtime*Nsec;
		if(n.mtime != de->mtime){
			op |= Owmtime;
			PACK64(p, n.mtime);
			p += 8;
		}
	}
	if(*d.uid != '\0'){
		rlock(&fs->userlk);
		u = name2user(d.uid);
		if(u == nil){
			runlock(&fs->userlk);
			rerror(m, Enouser);
			goto Out;
		}
		n.uid = u->id;
		runlock(&fs->userlk);
		if(n.uid != de->uid){
			op |= Owuid;
			PACK32(p, n.uid);
			p += 4;
		}
	}
	if(*d.gid != '\0'){
		rlock(&fs->userlk);
		u = name2user(d.gid);
		if(u == nil){
			runlock(&fs->userlk);
			rerror(m, Enouser);
			goto Out;
		}
		n.gid = u->id;
		runlock(&fs->userlk);
		if(n.gid != de->gid){
			op |= Owgid;
			PACK32(p, n.gid);
			p += 4;
		}
	}
	op |= Owmuid;
	n.muid = f->uid;
	PACK32(p, n.muid);
	p += 4;

	/* check permissions */
	if(rename){
		if(fsaccess(f, f->dmode, f->duid, f->dgid, DMWRITE) == -1){
			rerror(m, Eperm);
			goto Out;
		}
	}
	if(op & Owsize){
		if(fsaccess(f, de->mode, de->uid, de->gid, DMWRITE) == -1){
			rerror(m, Eperm);
			goto Out;
		}
	}
	if(op & (Owmode|Owmtime)){
		if(!permissive && f->uid != de->uid && !groupleader(f->uid, de->gid)){
			rerror(m, Ewstato);
			goto Out;
		}
	}
	if(op & Owuid){
		if(!permissive){
			rerror(m, Ewstatu);
			goto Out;
		}
	}
	if(op & Owgid){
		if(!permissive
		&& !(f->uid == de->uid && ingroup(f->uid, n.gid))
		&& !(groupleader(f->uid, de->gid) && groupleader(f->uid, n.gid))){
			rerror(m, Ewstatg);
			goto Out;
		}
	}

	/* update directory entry */
	nm = 0;
	if(rename && !de->gone){
		mb[nm].op = Oclobber;
		mb[nm].Key = de->Key;
		mb[nm].v = nil;
		mb[nm].nv = 0;
		nm++;
	
		mb[nm].op = Oinsert;
		if(dir2kv(f->pqpath, &n, &mb[nm], rnbuf, sizeof(rnbuf)) == -1){
			rerror(m, Efs);
			goto Out;
		}
		k = mb[nm].Key;
		nm++;

		if(de->qid.type & QTDIR){
			packsuper(upbuf, sizeof(upbuf), f->qpath);
			mb[nm].op = Oinsert;
			mb[nm].k = upbuf;
			mb[nm].nk = Upksz;
			mb[nm].v = mb[nm-1].k;
			mb[nm].nv = mb[nm-1].nk;
			nm++;
		}
	}else{
		opbuf[0] = op;
		mb[nm].op = Owstat;
		mb[nm].Key = de->Key;
		mb[nm].v = opbuf;
		mb[nm].nv = p - opbuf;
		nm++;
	}
	assert(nm <= nelem(mb));
	if((e = upsert(f->mnt, mb, nm)) != nil){
		wunlock(de);
		rerror(m, e);
		goto Out;
	}

	de->Xdir = n;
	if(rename)
		cpkey(de, &k, de->buf, sizeof(de->buf));

	r.type = Rwstat;
	respond(m, &r);

Out:
	if(!truncate)
		wunlock(de);
	putfid(f);
}


static void
fsclunk(Fmsg *m)
{
	Fcall r;
	Fid *f;

	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	lock(f);
	if(f->scan != nil){
		free(f->scan);
		f->scan = nil;
	}
	clunkfid(m->conn, f);
	unlock(f);
	r.type = Rclunk;
	respond(m, &r);
	putfid(f);
}

static void
fscreate(Fmsg *m)
{
	char *p, *e, buf[Kvmax], upkbuf[Keymax], upvbuf[Inlmax];
	Dent *de;
	vlong oldlen;
	Qid old;
	Fcall r;
	Msg mb[2];
	Fid *f;
	Xdir d;
	int nm;

	if(okname(m->name) == -1){
		rerror(m, Ename);
		return;
	}
	if(m->perm & (DMMOUNT|DMAUTH)){
		rerror(m, Ebotch);
		return;
	}
	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	lock(f);
	if(f->mode != -1){
		rerror(m, Einuse);
		goto Out;
	}
	de = f->dent;
	if(walk1(f->mnt->root, f->qpath, m->name, &old, &oldlen) == 0){
		rerror(m, Eexist);
		goto Out;
	}

	rlock(de);
	if(fsaccess(f, de->mode, de->uid, de->gid, DMWRITE) == -1){
		rerror(m, Eperm);
		runlock(de);
		goto Out;
	}

	d.gid = de->gid;
	runlock(de);

	nm = 0;
	d.qid.type = 0;
	if(m->perm & DMDIR)
		d.qid.type |= QTDIR;
	if(m->perm & DMAPPEND)
		d.qid.type |= QTAPPEND;
	if(m->perm & DMEXCL)
		d.qid.type |= QTEXCL;
	if(m->perm & DMTMP)
		d.qid.type |= QTTMP;
	d.qid.path = aincv(&fs->nextqid, 1);
	d.qid.vers = 0;
	d.mode = m->perm;
	if(m->perm & DMDIR)
		d.mode &= ~0777 | de->mode & 0777;
	else
		d.mode &= ~0666 | de->mode & 0666;
	d.name = m->name;
	d.atime = nsec();
	d.mtime = d.atime;
	d.length = 0;
	d.uid = f->uid;
	d.muid = f->uid;

	mb[nm].op = Oinsert;
	if(dir2kv(f->qpath, &d, &mb[nm], buf, sizeof(buf)) == -1){
		rerror(m, Efs);
		goto Out;
	}
	nm++;

	if(m->perm & DMDIR){
		mb[nm].op = Oinsert;
		if((p = packsuper(upkbuf, sizeof(upkbuf), d.qid.path)) == nil)
			sysfatal("ream: pack super");
		mb[nm].k = upkbuf;
		mb[nm].nk = p - upkbuf;
		if((p = packdkey(upvbuf, sizeof(upvbuf), f->qpath, d.name)) == nil)
			sysfatal("ream: pack super");
		mb[nm].v = upvbuf;
		mb[nm].nv = p - upvbuf;
		nm++;
	}
	if((e = upsert(f->mnt, mb, nm)) != nil){
		rerror(m, e);
		goto Out;
	}

	de = getdent(f->qpath, &d);
	if(de == nil){
		rerror(m, Enomem);
		goto Out;
	}
	f->mode = mode2bits(m->mode);
	f->pqpath = f->qpath;
	f->qpath = d.qid.path;
	f->dent = de;
	unlock(f);
	putfid(f);

	r.type = Rcreate;
	r.qid = d.qid;
	r.iounit = f->iounit;
	respond(m, &r);
	return;

Out:
	unlock(f);
	putfid(f);
	return;
}

static char*
candelete(Fid *f)
{
	char *e, pfx[Dpfxsz];
	Tree *t;
	Scan s;

	if(f->dent->qid.type == QTFILE)
		return nil;
	t = agetp(&f->mnt->root);
	packdkey(pfx, sizeof(pfx), f->qpath, nil);
	btnewscan(&s, pfx, sizeof(pfx));
	if((e = btenter(t, &s)) != nil)
		goto Out;
	if((e = btnext(&s, &s.kv)) != nil)
		goto Out;
	if(!s.done)
		e = Enempty;
Out:
	btexit(&s);
	return e;
}

static void
fsremove(Fmsg *m, Amsg **ao)
{
	char upbuf[Upksz];
	Fcall r;
	Msg mb[2];
	Fid *f;
	char *e;

	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	clunkfid(m->conn, f);

	wlock(f->dent);
	if(f->dent->gone){
		e = Ephase;
		goto Error;
	}
	if((e = candelete(f)) != nil)
		goto Error;
	if(fsaccess(f, f->dmode, f->duid, f->dgid, DMWRITE) == -1){
		e = Eperm;
		goto Error;
	}
	mb[0].op = Odelete;
	mb[0].k = f->dent->k;
	mb[0].nk = f->dent->nk;
	mb[0].nv = 0;

	packsuper(upbuf, sizeof(upbuf), f->qpath);
	mb[1].op = Oclobber;
	mb[1].k = upbuf;
	mb[1].nk = Upksz;
	mb[1].nv = 0;

	if((e = upsert(f->mnt, mb, 1)) != nil)
		goto Error;
	if(f->dent->qid.type == QTFILE){
		if((*ao = malloc(sizeof(Amsg))) == nil)
			goto Error;
		aincl(&f->mnt->ref, 1);
		(*ao)->op = AOclear;
		(*ao)->mnt = f->mnt;
		(*ao)->qpath = f->qpath;
		(*ao)->off = 0;
		(*ao)->length = f->dent->length;
		(*ao)->dent = nil;
	}
	f->dent->gone = 1;
	wunlock(f->dent);

	r.type = Rremove;
	respond(m, &r);
	putfid(f);
	return;

Error:
	wunlock(f->dent);
	rerror(m, e);
	putfid(f);
}

static void
fsopen(Fmsg *m, Amsg **ao)
{
	char *p, *e, buf[Kvmax];
	int mbits;
	Fcall r;
	Xdir d;
	Fid *f;
	Kvp kv;
	Msg mb;

	mbits = mode2bits(m->mode);
	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}

	if((f->qpath & Qdump) != 0){
		filldumpdir(&d);
	}else{
		if((e = lookup(f->mnt, f->dent, &kv, buf, sizeof(buf))) != nil){
			rerror(m, e);
			putfid(f);
			return;
		}
		if(kv2dir(&kv, &d) == -1){
			rerror(m, Efs);
			putfid(f);
			return;
		}
	}
	wlock(f->dent);
	if(f->dent->gone){
		rerror(m, Ephase);
Disallow:	wunlock(f->dent);
		putfid(f);
		return;
	}
	if(f->dent->qid.type & QTEXCL)
	if(f->dent->ref != 1){
		rerror(m, Elocked);
		goto Disallow;
	}
	if(fsaccess(f, d.mode, d.uid, d.gid, mbits) == -1){
		rerror(m, Eperm);
		goto Disallow;
	}
	f->dent->length = d.length;
	wunlock(f->dent);
	r.type = Ropen;
	r.qid = d.qid;
	r.iounit = f->iounit;

	lock(f);
	if(f->mode != -1){
		rerror(m, Einuse);
		unlock(f);
		putfid(f);
		return;
	}
	f->mode = mode2bits(m->mode);
//	if(!fs->rdonly && (m->mode == OEXEC)){
//		lock(&fs->root.lk);
//		f->root = fs->root;
//		unlock(&fs->root.lk);
//	}
	if(m->mode & OTRUNC){
		wlock(f->dent);
		f->dent->muid = f->uid;
		f->dent->qid.vers++;
		f->dent->length = 0;

		mb.op = Owstat;
		p = buf;
		p[0] = Owsize|Owmuid;	p += 1;
		PACK64(p, 0);		p += 8;
		PACK32(p, f->uid);	p += 4;
		mb.k = f->dent->k;
		mb.nk = f->dent->nk;
		mb.v = buf;
		mb.nv = p - buf;
		if((*ao = malloc(sizeof(Amsg))) == nil){
			e = Enomem;
			goto Error;
		}
		aincl(&f->mnt->ref, 1);
		(*ao)->op = AOclear;
		(*ao)->mnt = f->mnt;
		(*ao)->qpath = f->qpath;
		(*ao)->off = 0;
		(*ao)->length = f->dent->length;
		(*ao)->dent = nil;
		if((e = upsert(f->mnt, &mb, 1)) != nil){
Error:
			wunlock(f->dent);
			rerror(m, e);
			putfid(f);
			return;
		}
		wunlock(f->dent);
	}
	unlock(f);
	respond(m, &r);
	putfid(f);
}

static char*
readsnap(Fmsg *m, Fid *f, Fcall *r)
{
	char pfx[1], *p, *e;
	int n, ns;
	Scan *s;
	Xdir d;

	s = f->scan;
	if(s != nil && s->offset != 0 && s->offset != m->offset)
		return Edscan;
	if(s == nil || m->offset == 0){
		if((s = mallocz(sizeof(Scan), 1)) == nil)
			return Enomem;
		pfx[0] = Klabel;
		btnewscan(s, pfx, 1);
		lock(f);
		if(f->scan != nil){
			free(f->scan);
		}
		f->scan = s;
		unlock(f);
	}
	if(s->done){
		r->count = 0;
		return nil;
	}
	p = r->data;
	n = m->count;
	d = f->dent->Xdir;
	if(s->overflow){
		memcpy(d.name, s->kv.k+1, s->kv.nk-1);
		d.name[s->kv.nk-1] = 0;
		d.qid.path = UNPACK64(s->kv.v + 1);
		if((ns = dir2statbuf(&d, p, n)) == -1){
			r->count = 0;
			return nil;
		}
		s->overflow = 0;
		p += ns;
		n -= ns;
	}
	if((e = btenter(&fs->snap, s)) != nil)
		return e;
	while(1){
		if((e = btnext(s, &s->kv)) != nil)
			return e;
		if(s->done)
			break;
		memcpy(d.name, s->kv.k+1, s->kv.nk-1);
		d.name[s->kv.nk-1] = 0;
		d.qid.path = UNPACK64(s->kv.v + 1);
		if((ns = dir2statbuf(&d, p, n)) == -1){
			s->overflow = 1;
			break;
		}
		p += ns;
		n -= ns;
	}
	btexit(s);
	r->count = p - r->data;
	return nil;
}

static char*
readdir(Fmsg *m, Fid *f, Fcall *r)
{
	char pfx[Dpfxsz], *p, *e;
	int n, ns;
	Tree *t;
	Scan *s;

	s = f->scan;
	t = agetp(&f->mnt->root);
	if(s != nil && s->offset != 0 && s->offset != m->offset)
		return Edscan;
	if(s == nil || m->offset == 0){
		if((s = mallocz(sizeof(Scan), 1)) == nil)
			return Enomem;

		packdkey(pfx, sizeof(pfx), f->qpath, nil);
		btnewscan(s, pfx, sizeof(pfx));
		lock(f);
		if(f->scan != nil)
			free(f->scan);
		f->scan = s;
		unlock(f);
	}
	if(s->done){
		r->count = 0;
		return nil;
	}
	p = r->data;
	n = m->count;
	if(s->overflow){
		if((ns = kv2statbuf(&s->kv, p, n)) == -1){
			r->count = 0;
			return nil;
		}
		s->overflow = 0;
		p += ns;
		n -= ns;
	}
	if((e = btenter(t, s)) != nil)
		return e;
	while(1){
		if((e = btnext(s, &s->kv)) != nil)
			return e;
		if(s->done)
			break;
		if((ns = kv2statbuf(&s->kv, p, n)) == -1){
			s->overflow = 1;
			break;
		}
		p += ns;
		n -= ns;
	}
	btexit(s);
	r->count = p - r->data;
	return nil;
}

static char*
readfile(Fmsg *m, Fid *f, Fcall *r)
{
	vlong n, c, o;
	char *p;
	Dent *e;

	e = f->dent;
	rlock(e);
	if(m->offset > e->length){
		runlock(e);
		return nil;
	}
	p = r->data;
	c = m->count;
	o = m->offset;
	if(m->offset + m->count > e->length)
		c = e->length - m->offset;
	while(c != 0){
		n = readb(f, p, o, c, e->length);
		if(n == -1){
			fprint(2, "read %K [%Q]@%lld+%lld: %r\n", &e->Key, e->qid, o, c);
			runlock(e);
			return Efs;
		}
		r->count += n;
		if(n == 0)
			break;
		p += n;
		o += n;
		c -= n;
	}
	runlock(e);
	return nil;
}

static void
fsread(Fmsg *m)
{
	char *e;
	Fcall r;
	Fid *f;

	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	r.type = Rread;
	r.count = 0;
	if((r.data = malloc(m->count)) == nil){
		rerror(m, Enomem);
		putfid(f);
		return;
	}
	if(f->dent->qid.type & QTAUTH)
		e = authread(f, &r, r.data, m->count);
	else if(f->dent->qid.path == Qdump)
		e = readsnap(m, f, &r);
	else if(f->dent->qid.type & QTDIR)
		e = readdir(m, f, &r);
	else
		e = readfile(m, f, &r);
	if(e != nil)
		rerror(m, e);
	else
		respond(m, &r);
	free(r.data);
	putfid(f);
}

static void
fswrite(Fmsg *m)
{
	char sbuf[Wstatmax], kbuf[Max9p/Blksz+2][Offksz], vbuf[Max9p/Blksz+2][Ptrsz];
	Bptr bp[Max9p/Blksz + 2];
	Msg kv[Max9p/Blksz + 2];
	vlong n, o, c, w;
	char *p, *e;
	int i, j;
	Fcall r;
	Tree *t;
	Fid *f;

	if((f = getfid(m->conn, m->fid)) == nil){
		rerror(m, Enofid);
		return;
	}
	if(!(f->mode & DMWRITE)){
		rerror(m, Einuse);
		putfid(f);
		return;
	}
	wlock(f->dent);
	if(f->dent->gone){
		rerror(m, Ephase);
		wunlock(f->dent);
		putfid(f);
		return;
	}
	if(f->dent->qid.type == QTAUTH){
		e = authwrite(f, &r, m->data, m->count);
		if(e != nil)
			rerror(m, e);
		else
			respond(m, &r);
		wunlock(f->dent);
		putfid(f);
		return;
	}		

	w = 0;
	p = m->data;
	o = m->offset;
	c = m->count;
	t = agetp(&f->mnt->root);
	for(i = 0; i < nelem(kv)-1 && c != 0; i++){
		assert(i == 0 || o%Blksz == 0);
		kv[i].op = Oinsert;
		kv[i].k = kbuf[i];
		kv[i].nk = sizeof(kbuf[i]);
		kv[i].v = vbuf[i];
		kv[i].nv = sizeof(vbuf[i]);
		n = writeb(f, &kv[i], &bp[i], p, o, c, f->dent->length);
		if(n == -1){
			for(j = 0; j < i; j++)
				freeblk(t, nil, bp[i]);
			wunlock(f->dent);
			fprint(2, "%r");
			putfid(f);
			abort();
			return;
		}
		w += n;
		p += n;
		o += n;
		c -= n;
	}

	p = sbuf;
	kv[i].op = Owstat;
	kv[i].k = f->dent->k;
	kv[i].nk = f->dent->nk;
	n = m->offset+w;
	*p++ = 0;
	if(n > f->dent->length){
		sbuf[0] |= Owsize;
		PACK64(p, n);
		p += 8;
		f->dent->length = m->offset+m->count;
	}
	sbuf[0] |= Owmtime;
	f->dent->mtime = nsec();
	PACK64(p, f->dent->mtime);
	p += 8;
	sbuf[0] |= Owmuid;
	PACK32(p, f->uid);
	p += 4;

	kv[i].v = sbuf;
	kv[i].nv = p - sbuf;
	if((e = upsert(f->mnt, kv, i+1)) != nil){
		rerror(m, e);
		wunlock(f->dent);
		putfid(f);
		abort();
		return;
	}
	wunlock(f->dent);

	r.type = Rwrite;
	r.count = w;
 	respond(m, &r);
	putfid(f);
}

void
fsflush(Fmsg *m)
{
	Fcall r;

	r.type = Rflush;
	respond(m, &r);
}

Conn *
newconn(int rfd, int wfd)
{
	Conn *c;

	if((c = mallocz(sizeof(*c), 1)) == nil)
		return nil;
	c->rfd = rfd;
	c->wfd = wfd;
	c->iounit = Max9p;
	c->next = fs->conns;
	lock(&fs->connlk);
	fs->conns = c;
	unlock(&fs->connlk);
	return c;
}

void
runfs(int id, void *pc)
{
	char err[128];
	RWLock *lk;
	Conn *c;
	Fcall r;
	Fmsg *m;
	u32int h;

	c = pc;
	while(1){
		if(readmsg(c, &m) < 0){
			fshangup(c, "read message: %r");
			return;
		}
		if(m == nil)
			break;
		if(convM2S(m->buf, m->sz, m) == 0){
			fshangup(c, "invalid message: %r");
			return;
		}
		if(m->type != Tversion && !c->versioned){
			fshangup(c, "version required");
			return;
		}
		dprint("← %F\n", &m->Fcall);

		if(m->type == Tflush){
			lk = &fs->flushq[ihash(m->oldtag) % Nflushtab];
			wlock(lk);
		}else{
			lk = &fs->flushq[ihash(m->tag) % Nflushtab];
			rlock(lk);
		}

		h = ihash(m->fid) % fs->nreaders;
		switch(m->type){
		/* sync setup, must not access tree */
		case Tversion:	fsversion(m);	break;
		case Tauth:	fsauth(m);	break;
		case Tclunk:	fsclunk(m);	break;
		case Tflush:	fsflush(m);	break;

		/* mutators */
		case Tcreate:	chsend(fs->wrchan, m);	break;
		case Twrite:	chsend(fs->wrchan, m);	break;
		case Twstat:	chsend(fs->wrchan, m);	break;
		case Tremove:	chsend(fs->wrchan, m);	break;

		/* reads */
		case Tattach:	chsend(fs->rdchan[h], m);	break;
		case Twalk:	chsend(fs->rdchan[h], m);	break;
		case Tread:	chsend(fs->rdchan[h], m);	break;
		case Tstat:	chsend(fs->rdchan[h], m);	break;

		/* both */
		case Topen:
			if((m->mode & OTRUNC) || (m->mode & 0xf) == OEXEC)
				chsend(fs->wrchan, m);
			else
				chsend(fs->rdchan[h], m);
			break;

		default:
			fprint(2, "unknown message %F\n", &m->Fcall);
			snprint(err, sizeof(err), "unknown message: %F", &m->Fcall);
			r.type = Rerror;
			r.ename = err;
			respond(m, &r);
			break;
		}
	}
}

void
runwrite(int id, void *)
{
	Fmsg *m;
	Amsg *a;

	while(1){
		a = nil;
		m = chrecv(fs->wrchan);
		if(fs->rdonly){
			rerror(m, Erdonly);
			continue;
 		}
		if(fs->broken){
			rerror(m, Efs);
			continue;
		}

		qlock(&fs->mutlk);
		epochstart(id);
		switch(m->type){
		case Tcreate:	fscreate(m);	break;
		case Twrite:	fswrite(m);	break;
		case Twstat:	fswstat(m, &a);	break;
		case Tremove:	fsremove(m,&a);	break;
		case Topen:	fsopen(m, &a);	break;
		default:	abort();	break;
		}
 		epochend(id);
 		qunlock(&fs->mutlk);
		epochclean();

		if(a != nil)
			chsend(fs->admchan, a);
	}
}

void
runread(int id, void *ch)
{
	Fmsg *m;

	while(1){
		m = chrecv(ch);
		epochstart(id);
		switch(m->type){
		case Tattach:	fsattach(m);	break;
		case Twalk:	fswalk(m);	break;
		case Tread:	fsread(m);	break;
		case Tstat:	fsstat(m);	break;
		case Topen:	fsopen(m, nil);	break;
		}
		epochend(id);
	}
}

void
runsweep(int id, void*)
{
	char *e, buf[Offksz];
	Mount *mnt;
	Bptr bp, nb, *oldhd;
	Arena *a;
	Amsg *am;
	vlong off;
	Blk *b;
	Msg m;
	int i;

	if((oldhd = calloc(fs->narena, sizeof(Bptr))) == nil)
		sysfatal("malloc log heads");
	while(1){
		am = chrecv(fs->admchan);
		switch(am->op){
		case AOsync:
			for(i = 0; i < fs->narena; i++){
				a = &fs->arenas[i];
				qlock(a);
				if(a->nlog < a->reserve/(10*Blksz)){
					oldhd[i] = (Bptr){-1, -1, -1};
					qunlock(a);
					continue;
				}
				oldhd[i] = a->loghd;
				epochstart(id);
				if(compresslog(a) == -1)
					fprint(2, "compress log: %r");
				qunlock(a);
				epochend(id);
				epochclean();
			}
			qlock(&fs->mutlk);
			if(am->halt)
				ainc(&fs->rdonly);
			epochstart(id);
			lock(&fs->mountlk);
			for(mnt = fs->mounts; mnt != nil; mnt = mnt->next)
				updatesnap(&mnt->root, mnt->root, mnt->name);
			unlock(&fs->mountlk);
			sync();
			epochend(id);
			epochclean();
			qunlock(&fs->mutlk);

			for(i = 0; i < fs->narena; i++){
				for(bp = oldhd[i]; bp.addr != -1; bp = nb){
					epochstart(id);
					if((b = getblk(bp, 0)) == nil){
						fprint(2, "could not load %B\n", bp);
						fs->broken = 1;
						break;
					}
					nb = b->logp;
					freeblk(nil, b, b->bp);
					dropblk(b);
					epochend(id);
					epochclean();
				}
			}
			if(am->halt){
				postnote(PNGROUP, getpid(), "halted");
				exits(nil);
			}
			break;

		case AOsnap:
			qlock(&fs->mutlk);
			epochstart(id);
			snapfs(am);
			sync();
			epochend(id);
			qunlock(&fs->mutlk);
			break;

		case AOclear:
			for(off = am->off; off < am->length; off += Blksz){
				qlock(&fs->mutlk);
				epochstart(id);
				m.k = buf;
				m.nk = sizeof(buf);
				m.op = Oclearb;
				m.k[0] = Kdat;
				PACK64(m.k+1, am->qpath);
				PACK64(m.k+9, off);
				m.v = nil;
				m.nv = 0;
				if((e = upsert(am->mnt, &m, 1)) != nil){
					fprint(2, "sweep: %s\n", e);
					fs->broken++;
				}
				epochend(id);
				qunlock(&fs->mutlk);
				epochclean();
			}
			if(am->dent != nil){
				wunlock(am->dent);
				clunkdent(am->dent);
			}
			clunkmount(am->mnt);
			break;
		}
		free(am);
	}
}

void
runtasks(int, void *)
{
	Amsg *a;

	while(1){
		sleep(5000);
		a = mallocz(sizeof(Amsg), 1);
		if(a == nil){
			fprint(2, "alloc sync msg: %r\n");
			free(a);
			return;
		}
		a->op = AOsync;
		a->halt = 0;
		a->fd = -1;
		chsend(fs->admchan, a);
	}
}