shithub: devlimit-patch

Download patch

ref: a9f94671c16558d2155ae9c37cd8c4e4101fef46
author: kitzman <kitzman@disroot.org>
date: Thu Feb 22 02:45:52 EST 2024

added patch and README

--- /dev/null
+++ b/0001-limits-added-devlimit-process-and-memory-limits.patch
@@ -1,0 +1,1234 @@
+From: kitzman <kitzman@disroot.org>
+Date: Sun, 18 Feb 2024 09:36:59 +0000
+Subject: [PATCH] limits: added devlimit, process and memory limits
+
+---
+diff 79a7b4ae59c2e0352b354cf719bc7ef4055f83ae c7a44c1564d2fab0b82b744927ed0f2be468fa1b
+--- /dev/null
++++ b/sys/man/3/limit
+@@ -1,0 +1,136 @@
++.TH LIMIT 3
++.SH NAME
++limit \- system limit interface
++.SH SYNOPSIS
++.nf
++.B bind #Λ /mnt/limit
++
++.B /mnt/limit/ctl
++.B /mnt/limit/clone
++.B /mnt/limit/switch
++.B /mnt/limit/status
++.BI /mnt/limit/groups/ \&...
++.fi
++.SH DESCRIPTION
++The
++.B #Λ
++device provides the interface to the system's limit mechanism.
++Each process has a limit group assigned. Groups are created as
++children of other groups, are represented by an id, and follow the
++restrictions imposed by their parents. A group is deleted the
++moment there are no more processes or segments referring to it.
++The group is
++.IR owned
++by the uname and gid of the creator process, and the attributes
++can be changed. The system's group, with id 1, imposes no
++restrictions and does not increase counters.
++.PP
++The root directory contains the
++.BR ctl ,
++.BR clone ,
++.BR switch ,
++.BR status
++files, and a directory named
++.BR groups .
++The current process' limit group is always presented in the root directory,
++and other groups, in the
++.B groups
++subdirectory, where they have the same files, except for the
++.B groups .
++.PP
++The
++.B ctl
++file controls the limit group's attributes, namely, the label, and the limits.
++Reading the file returns the limit group's id. The following commands are
++supported:
++.RS
++.TF "\fLmproc number \fR"
++.PD
++.
++.TP
++.BI "label " label
++Set the group's label; the labels need not be unique.
++.TP
++.BI "mlim " number
++Set the maximum amount of groups that can be
++created from this group.
++.TP
++.BI "mproc " number
++Set the maximum amount of processes that can be
++forked inside this group.
++.TP
++.BI "mpage " number
++Set the maximum total amount of pages that segments
++inside the limit group can have.
++.PD
++.RE
++.PP
++Reading the
++.B status
++file shows the group's attributes: the label (or an empty line
++if not set), the current limit restrictions on the left, and the counts,
++on the right.
++.PP
++To create a new group, the
++.B clone
++file should be opened. Reading from the fid returns the newly created
++group's id. This creates a new reference, and when the fid is clunked,
++the reference is destroyed. A reference is also created for the parent limit group.
++Thus, the opener's process limit group is updated.
++.PP
++To switch to an existing limit group, the
++.B switch
++should be opened.
++.PP
++The following limits exist and are imposed: limit limits, which represent the
++amount of maximum amount of limits which can be \"forked\" from this group,
++and process limits, which represent the maximum number of processes which can
++exist in the group. The kernel already imposes limits on the number of files
++which can be opened, so implementing that should be redundant.
++.SH EXAMPLES
++To create a new limit group and restrict the amount of sublimits and processes:
++.IP
++.EX
++% <>[10] /mnt/limit/clone
++% echo label mygroup >/mnt/limit/ctl
++% echo mlim 1 >/mnt/limit/ctl
++% echo mproc 20 >/mnt/limit/ctl
++.EE
++.PP
++Snippet to attach to an existing limit group, looking for a specific label:
++.IP
++.EX
++#!/bin/rc
++
++slabel=$1
++shift
++prog=$*
++
++bind '#Λ' /mnt/limit
++
++for (lgrpstat in `{walk -f /mnt/limit/groups | grep 'status$'}) {
++	lgrpdir=`{basename -d $lgrpstat}
++	lgrplabel=`{cat $lgrpstat | sed 1q}
++	if(~ $lgrplabel $slabel) {
++		<>[10] $lgrpdir/switch
++		exec $prog
++	}
++}
++
++echo lgrp $slabel not found
++exit nolgrp
++.EE
++.SH SOURCE
++.B /sys/src/9/port/limit.c
++.br
++.B /sys/src/9/port/devlimit.c
++.SH BUGS
++.PP
++Not all processes use
++.B pexit ,
++probably the ones which are created during boot. This
++has to be checked.
++.PP
++Spawning a lot of processes inside a limit group which is not
++the root group can cause a kernel panic related
++to runlock. Needs to be investigated.
+--- a/sys/src/9/arm64/mkfile
++++ b/sys/src/9/arm64/mkfile
+@@ -20,6 +20,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/bcm/mkfile
++++ b/sys/src/9/bcm/mkfile
+@@ -21,6 +21,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/bcm64/mkfile
++++ b/sys/src/9/bcm64/mkfile
+@@ -19,6 +19,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/cycv/mkfile
++++ b/sys/src/9/cycv/mkfile
+@@ -20,6 +20,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	rebootcmd.$O\
+ 	page.$O\
+--- a/sys/src/9/imx8/mkfile
++++ b/sys/src/9/imx8/mkfile
+@@ -20,6 +20,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/kw/mkfile
++++ b/sys/src/9/kw/mkfile
+@@ -21,6 +21,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	rebootcmd.$O\
+ 	page.$O\
+--- a/sys/src/9/mt7688/mkfile
++++ b/sys/src/9/mt7688/mkfile
+@@ -24,6 +24,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/mtx/mkfile
++++ b/sys/src/9/mtx/mkfile
+@@ -18,6 +18,7 @@
+ 	edf.$O\
+ 	fault.$O\
+ 	iomap.$O\
++	limit.$O\
+ 	log.$O\
+ 	mul64fract.$O\
+ 	rebootcmd.$O\
+--- a/sys/src/9/omap/mkfile
++++ b/sys/src/9/omap/mkfile
+@@ -22,6 +22,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	rebootcmd.$O\
+ 	page.$O\
+--- a/sys/src/9/pc/mkfile
++++ b/sys/src/9/pc/mkfile
+@@ -25,6 +25,7 @@
+ 	edf.$O\
+ 	fault.$O\
+ 	iomap.$O\
++	limit.$O\
+ 	memmap.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/pc64/mkfile
++++ b/sys/src/9/pc64/mkfile
+@@ -23,6 +23,7 @@
+ 	edf.$O\
+ 	fault.$O\
+ 	iomap.$O\
++	limit.$O\
+ 	memmap.$O\
+ 	page.$O\
+ 	parse.$O\
+--- /dev/null
++++ b/sys/src/9/port/devlimit.c
+@@ -1,0 +1,415 @@
++#include	"u.h"
++#include	"../port/lib.h"
++#include	"mem.h"
++#include	"dat.h"
++#include	"fns.h"
++#include	"../port/error.h"
++
++extern ulong	kerndate;
++
++extern Lgrp *lgrptab[LIMMAX];
++extern Lock lgrptablock;
++
++Lgrp*	getlgrp(int);
++void	switchlgrp(Lgrp*);
++
++/*	filesystem	*/
++enum {
++	LimitQidPos		=	8,
++	LimitQidMask	=	0xff,
++	LabelSize		=	64,
++	StatusSize		=	256,
++};
++
++#define	QID(q)			((int)(q & LimitQidMask))
++#define	LGID(q)			((int)(q >> LimitQidPos))
++#define	LGPATH(i, q)	(((uvlong)i << LimitQidPos) + (uvlong)q)
++
++enum
++{
++	Qroot,
++	Qctl,
++	Qclone,
++	Qswitch,
++	Qstatus,
++	Qgroups,
++};
++
++static Dirtab limitdir[] =
++{
++	".",			{Qroot, 0, QTDIR},				0,	DMDIR|0550,
++	"ctl",			{Qctl},							0,	0640,
++	"clone",		{Qclone},						0,	0440,
++	"switch",		{Qswitch},						0,	0440,
++	"status",		{Qstatus},						0,	0440,
++	"groups",		{Qgroups, 0, QTDIR},			0,	DMDIR|0550,
++};
++
++// ctl commands
++enum {
++	CMlabel,
++	CMsetmlim,
++	CMsetmproc,
++	CMsetmpage,
++};
++
++static
++Cmdtab limitcmd[] = {
++	CMlabel,	"label",	2,
++	CMsetmlim,	"mlim",		2,
++	CMsetmproc,	"mproc",	2,
++	CMsetmpage,	"mpage",	2,
++};
++
++void
++limdir(Chan *c, Qid qid, char *n, vlong length, char *user, char *group, long perm, Dir *db)
++{
++	db->name = n;
++	if(c->flag&CMSG)
++		qid.type |= QTMOUNT;
++	db->qid = qid;
++	db->type = devtab[c->type]->dc;
++	db->dev = c->dev;
++	db->mode = perm;
++	db->mode |= qid.type << 24;
++	db->atime = seconds();
++	db->mtime = kerndate;
++	db->length = length;
++	db->uid = user;
++	db->gid = group;
++	db->muid = user;
++}
++
++static int
++limgen(Chan *c, char *name, Dirtab* tab, int ntab, int s, Dir *dp)
++{
++	Lgrp *l;
++	Qid q;
++	long perm;
++
++	int lgid = LGID(c->qid.path);
++	int i;
++
++	if(lgid)
++		l = getlgrp(lgid);
++	else
++		l = up->lgrp;
++	if(l == nil)
++		return -1;
++
++	/* device root */
++	if(s == DEVDOTDOT){
++		rlock(l);
++		if(lgid == 0) {
++			c->qid.vers = 1;
++			limdir(c, c->qid, "#λ", 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
++		} else {
++			limdir(c, tab[Qgroups].qid, tab[Qgroups].name, 0, l->uid, l->gid, (long)tab[Qgroups].perm, dp);
++		}
++		runlock(l);
++		return 1;
++	}
++
++	/* tab is part of every gen due to (i) and (ii) */
++	if(QID(c->qid.path) == Qgroups) goto groupsgen;	// or not?
++	if(name) {
++		if(lgid != 0 && strcmp(name, tab[Qgroups].name) == 0)
++			return 0;
++		if(strcmp(name, tab[QID(c->qid.path)].name) == 0)
++			return -1;
++		for(i = 0; i < ntab; i++) {
++			if(strcmp(name, tab[i].name) == 0) {
++				rlock(l);
++				perm = tab[i].perm;
++				if(i == Qctl && (lgid == 1 || (lgid == 0 && up->lgrp->lgid == 1)))
++					perm = 0440;
++				mkqid(&q, LGPATH(lgid, i), 0, tab[i].qid.type);
++				limdir(c, q, name, 0, l->uid, l->gid, perm, dp);
++				runlock(l);
++				return 1;
++			}
++		}
++	} else {
++		if(s < ntab) {
++			if(lgid != 0 && s == Qgroups)
++				return 0;
++			if(QID(c->qid.path) == s)
++				return 0;
++			rlock(l);
++			perm = tab[s].perm;
++			if(s == Qctl && (lgid == 1 || (lgid == 0 && up->lgrp->lgid == 1)))
++				perm = 0440;
++			mkqid(&q, LGPATH(lgid, s), 0, tab[s].qid.type);
++			limdir(c, q, tab[s].name, 0, l->uid, l->gid, perm, dp);
++			runlock(l);
++			return 1;
++		}
++	}
++
++	/* the lgrp dirs are only part of Qgroups (i) and the dirs themselves (ii) */
++groupsgen:
++	if(QID(c->qid.path) == Qgroups && s < ntab)
++		return 0;
++	if(QID(c->qid.path) != Qgroups || (QID(c->qid.path) == Qroot && lgid == 0))
++		return -1;
++	if(name) {
++		i = atoi(name);
++		if(i > LIMMAX || i < 1)
++			return -1;
++		if(l = getlgrp(i)) {
++			rlock(l);
++			mkqid(&q, LGPATH(i, Qroot), 0, QTDIR);
++			limdir(c, q, name, 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
++			runlock(l);
++			return 1;
++		}
++	} else {
++		i = s - ntab + 1;
++		if(i > LIMMAX || i < 1)
++			return -1;
++		if(l = getlgrp(i)) {
++			rlock(l);
++			name = malloc(NUMSIZE);
++			snprint(name, NUMSIZE, "%d", s - ntab + 1);
++			mkqid(&q, LGPATH(i, Qroot), 0, QTDIR);
++			limdir(c, q, name, 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
++			runlock(l);
++			return 1;
++		}
++		return 0;
++	}
++	return -1;
++}
++
++static Chan*
++limattach(char *spec)
++{
++	return devattach(L'Λ', spec);
++}
++
++static Walkqid*
++limwalk(Chan *c, Chan *nc, char **name, int nname)
++{
++	return devwalk(c, nc, name, nname, limitdir, nelem(limitdir), limgen);
++}
++
++static int
++limstat(Chan *c, uchar *db, int n)
++{
++	return devstat(c, db, n, limitdir, nelem(limitdir), limgen);
++}
++
++static Chan*
++limopen(Chan *c, int omode)
++{
++	Chan *co;
++	Lgrp *l = up->lgrp;
++	int lgid = LGID(c->qid.path);
++
++	if(lgid)
++		l = getlgrp(lgid);
++	if(l == nil)
++		error(Enonexist);
++
++	if(c->qid.type & QTDIR)
++		if(omode != OREAD)
++			error(Eperm);
++
++	co = devopen(c, omode, limitdir, nelem(limitdir), limgen);
++
++	switch(QID(c->qid.path)) {
++	case Qclone:
++		l = newlgrp(l);
++		switchlgrp(l);
++		break;
++	case Qswitch:
++		switchlgrp(l);
++		break;
++	}
++
++	return co;
++}
++
++static void
++limclose(Chan *c)
++{
++	Lgrp *l = up->lgrp;
++	int lgid = LGID(c->qid.path);
++
++	if(lgid)
++		l = getlgrp(lgid);
++	if(l == nil)
++		error(Enonexist);
++}
++
++static void
++limremove(Chan*)
++{
++	error(Eperm);
++}
++
++static long
++limread(Chan *c, void *va, long n, vlong off)
++{
++	Lgrp *l = up->lgrp;
++	char *buf;
++	long m;
++	int lgid = LGID(c->qid.path);
++
++	if(lgid)
++		l = getlgrp(lgid);
++	if(l == nil)
++		error(Enonexist);
++
++	switch(QID(c->qid.path)){
++	case Qroot:
++		return devdirread(c, va, n, limitdir, nelem(limitdir), limgen);
++	case Qctl:
++		rlock(l);
++		m = readnum((ulong) off, va, n, l->lgid, NUMSIZE);
++		runlock(l);
++		return m;
++		break;
++	case Qclone:
++	case Qswitch:
++		return readstr((ulong) off, va, n, "");
++		break;
++	case Qstatus:
++		buf = malloc(StatusSize);
++		rlock(l);
++		if(l->label) m = snprint(buf, LabelSize + 1, "%s\n", l->label);
++		else m = snprint(buf, LabelSize + 1, "\n");
++		snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mlim, NUMSIZE-1, l->clim);
++		snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mproc, NUMSIZE-1, l->cproc);
++		snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mpage, NUMSIZE-1, l->cpage);
++		runlock(l);
++		m = readstr((ulong) off, va, n, buf);
++		free(buf);
++		return m;
++	case Qgroups:
++		if(lgid != 0)
++			error(Eperm);
++		return devdirread(c, va, n, limitdir, nelem(limitdir), limgen);
++	default:
++		error(Eperm);
++		break;
++	}
++}
++
++static long
++limwrite(Chan *c, void *va, long n, vlong)
++{
++	Lgrp *l = up->lgrp;
++	Cmdbuf *cb;
++	Cmdtab *ct;
++	char *label, *newm;
++	long m;
++	int lgid = LGID(c->qid.path);
++
++	if(lgid)
++		l = getlgrp(lgid);
++	if(l == nil)
++		error(Enonexist);
++
++	switch(QID(c->qid.path)){
++	case Qctl:
++		cb = parsecmd(va, n);
++		if(waserror()) {
++			free(cb);
++			nexterror();
++		}
++		ct = lookupcmd(cb, limitcmd, nelem(limitcmd));
++		if(ct == nil)
++			error(Ebadctl);
++
++		switch(ct->index) {
++		case CMlabel:
++			label = cb->f[1];
++			if(strlen(label) > LabelSize - 1)
++				error(Eperm);
++			wlock(l);
++			kstrdup(&l->label, label);
++			wunlock(l);
++			break;
++		case CMsetmlim:
++			newm = cb->f[1];
++			m = atoi(newm);
++			if(!m)
++				error(Ebadctl);
++			wlock(l);
++			l->mlim = m;
++			wunlock(l);
++			break;
++		case CMsetmproc:
++			newm = cb->f[1];
++			m = atoi(newm);
++			if(!m)
++				error(Ebadctl);
++			wlock(l);
++			l->mproc = m;
++			wunlock(l);
++			break;
++		case CMsetmpage:
++			newm = cb->f[1];
++			m = atoi(newm);
++			if(!m)
++				error(Ebadctl);
++			wlock(l);
++			l->mpage = m;
++			wunlock(l);
++			break;
++		default:
++			error(Ebadctl);
++			break;
++		}
++		free(cb);
++		poperror();
++		break;
++	default:
++		error(Eperm);
++		break;
++	}
++
++	return n;
++}
++
++Dev limitdevtab = {
++	L'Λ',
++	"limit",
++
++	devreset,
++	devinit,
++	devshutdown,
++	limattach,
++	limwalk,
++	limstat,
++	limopen,
++	devcreate,
++	limclose,
++	limread,
++	devbread,
++	limwrite,
++	devbwrite,
++	limremove,
++	devwstat,
++};
++
++/*	helper functions	*/
++Lgrp*
++getlgrp(int lgid)
++{
++	Lgrp *l;
++	lock(&lgrptablock);
++	l = lgrptab[lgid - 1];
++	unlock(&lgrptablock);
++	return l;
++}
++
++void
++switchlgrp(Lgrp *l)
++{
++	Lgrp *o = up->lgrp;
++	incref(l);
++	up->lgrp = l;
++	closelgrp(o);
++}
+--- /dev/null
++++ b/sys/src/9/port/limit.c
+@@ -1,0 +1,216 @@
++#include	"u.h"
++#include	"../port/lib.h"
++#include	"mem.h"
++#include	"dat.h"
++#include	"fns.h"
++#include	"../port/error.h"
++
++Lgrp *lgrptab[LIMMAX] = { nil };
++Lock lgrptablock;
++
++/*	helper functions	*/
++void
++addchild(Lgrp *parent, Lgrp *new)
++{
++	wlock(parent);
++	incref(parent);
++	if(parent->submax == 0 || parent->subgrp == nil) {
++		parent->submax = LIMINISUB;
++		parent->subgrp = malloc(LIMINISUB);
++	}
++	if(parent->subcount + 1 > parent->submax) {
++		parent->subgrp = realloc(parent->subgrp, parent->submax * 2);
++		parent->submax *= 2;
++	}
++	parent->subgrp[parent->subcount] = new;
++	parent->subcount++;
++	wunlock(parent);
++}
++
++void
++removechild(Lgrp *old)
++{
++	Lgrp *parent;
++	int i;
++
++	if(old->parent) parent = old->parent; else return;
++	wlock(parent);
++	for(i = 0; i < parent->subcount; i++)
++		if(parent->subgrp[i] == old)
++			break;
++	if(i == parent->subcount) {
++		wunlock(parent);
++		return;
++	}
++
++	for(; i < parent->subcount - 1; i++)
++		parent->subgrp[i] = parent->subgrp[i + 1];
++	parent->subcount--;		
++
++	wunlock(parent);
++	closelgrp(parent);
++}
++
++/*	kernel functions	*/
++Lgrp*
++newlgrp(Lgrp *parent)
++{
++	int lgid;
++
++	lock(&lgrptablock);
++	for(lgid = 0; lgid < LIMMAX; lgid++)
++		if(lgrptab[lgid] == nil)
++			break;
++
++	if(waserror()) {
++		unlock(&lgrptablock);
++		nexterror();
++	}
++	if(lgid == LIMMAX)
++		error("system has reached the maximum amount of limits");
++	lgid++;
++
++	if(parent) inclimit(LTLIM, parent, 1);
++
++	Lgrp* l = malloc(sizeof(Lgrp));
++	l->lgid = lgid;
++	if(parent) addchild(parent, l);
++	if(parent) rlock(parent);
++	wlock(l);
++	lgrptab[lgid - 1] = l;
++	poperror();
++	unlock(&lgrptablock);
++	l->clim = 0;
++	l->cproc = 0;
++	l->cpage = 0;
++	l->mlim = 0;
++	l->mproc = 0;
++	l->mpage = 0;
++	l->subcount = 0;
++	l->submax = 0;
++	l->subgrp = nil;
++	if(parent) {
++		l->mlim = parent->mlim;
++		l->mproc = parent->mproc;
++		l->mpage = parent->mpage;
++	}
++	if(up->user) {
++		kstrdup(&l->uid, up->user);
++		kstrdup(&l->gid, up->user);
++	} else {
++		kstrdup(&l->uid, eve);
++		kstrdup(&l->gid, eve);
++	}
++	l->parent = parent;
++	l->subgrp = nil;
++	if(parent) runlock(parent);
++	wunlock(l);
++
++	return l;
++}
++
++void
++inclimit(int limit, Lgrp *l, int q)
++{
++	Lgrp *c;
++	ulong cval, mval;
++
++	for(c = l; c; c = c->parent) {
++		if(c->lgid == 1)
++			break;
++		rlock(c);
++		switch(limit) {
++		case LTLIM:
++			cval = c->clim;
++			mval = c->mlim;
++			break;
++		case LTPROC:
++			cval = c->cproc;
++			mval = c->mproc;
++			break;
++		case LTPAGE:
++			cval = c->cpage;
++			mval = c->mpage;
++			break;
++		default:
++			runlock(c);
++			error("unknown limit type");
++		}
++		if(mval && cval + q > mval) {
++			runlock(c);
++			error("limit reached");
++		}
++		runlock(c);
++	}
++	/*	small amounts over the limit can't hurt	*/
++	for(c = l; c; c = c->parent) {
++		if(c->lgid == 1)
++			break;
++		wlock(c);
++		switch(limit) {
++		case LTLIM:
++			c->clim += q;
++			break;
++		case LTPROC:
++			c->cproc += q;
++			break;
++		case LTPAGE:
++			c->cpage += q;
++			break;
++		default:
++			wunlock(c);
++			error("unknown limit type");
++		}
++		wunlock(c);
++	}
++}
++
++void
++declimit(int limit, Lgrp *l, int q)
++{
++	Lgrp *c;
++
++	for(c = l; c; c = c->parent) {
++		if(c->lgid == 1)
++			break;
++		wlock(c);
++		switch(limit) {
++		case LTLIM:
++			c->clim -= q;
++			break;
++		case LTPROC:
++			c->cproc -= q;
++			break;
++		case LTPAGE:
++			c->cpage -= q;
++			break;
++		default:
++			wunlock(c);
++			error("unknown limit type");
++		}
++		wunlock(c);
++	}
++}
++
++void
++closelgrp(Lgrp* l)
++{
++	if(decref(l) == 0) {
++		wlock(l);
++		if(waserror()) {
++			wunlock(l);
++			nexterror();
++		}
++		if(l->parent) declimit(LTLIM, l->parent, 1);
++		removechild(l);
++		wunlock(l);
++		poperror();
++		lock(&lgrptablock);
++		lgrptab[l->lgid - 1] = nil;
++		unlock(&lgrptablock);
++		if(l->label) free(l->label);
++		free(l->uid);
++		free(l->gid);
++		free(l);
++	}
++}
+--- a/sys/src/9/port/portdat.h
++++ b/sys/src/9/port/portdat.h
+@@ -14,6 +14,7 @@
+ typedef struct Image	Image;
+ typedef struct Log	Log;
+ typedef struct Logflag	Logflag;
++typedef struct Lgrp	Lgrp;
+ typedef struct Mntcache Mntcache;
+ typedef struct Mount	Mount;
+ typedef struct Mntrah	Mntrah;
+@@ -433,6 +434,7 @@
+ 	Pte	*ssegmap[SSEGMAPSIZE];
+ 	Sema	sema;
+ 	ulong	mark;		/* portcountrefs */
++	Lgrp		*lgrp;
+ };
+ 
+ struct Segio
+@@ -538,6 +540,31 @@
+ 	DELTAFD	= 20		/* incremental increase in Fgrp.fd's */
+ };
+ 
++struct Lgrp
++{
++	Ref;
++	RWlock;
++	int		lgid;
++	ulong	clim, mlim;
++	ulong	cproc, mproc;
++	ulong	cpage, mpage;
++	char*	label;
++	char*	uid;
++	char*	gid;
++	Lgrp*	parent;
++	uint	subcount, submax;
++	Lgrp**	subgrp;
++};
++
++enum
++{
++	LIMMAX = 4096,
++	LIMINISUB = 8,
++	LTLIM = 0,
++	LTPROC,
++	LTPAGE,
++};
++
+ struct Palloc
+ {
+ 	Lock;
+@@ -692,6 +719,7 @@
+ 	Egrp 	*egrp;		/* Environment group */
+ 	Fgrp	*fgrp;		/* File descriptor group */
+ 	Rgrp	*rgrp;		/* Rendez group */
++	Lgrp	*lgrp;		/* Limit group */
+ 
+ 	Fgrp	*closingfgrp;	/* used during teardown */
+ 
+--- a/sys/src/9/port/portfns.h
++++ b/sys/src/9/port/portfns.h
+@@ -34,6 +34,7 @@
+ void		ccloseq(Chan*);
+ void		closeegrp(Egrp*);
+ void		closefgrp(Fgrp*);
++void		closelgrp(Lgrp*);
+ void		closepgrp(Pgrp*);
+ void		closergrp(Rgrp*);
+ long		clrfpintr(void);
+@@ -55,6 +56,7 @@
+ void		cupdate(Chan*, uchar*, int, vlong);
+ void		cwrite(Chan*, uchar*, int, vlong);
+ uintptr		dbgpc(Proc*);
++void		declimit(int, Lgrp*, int);
+ long		decref(Ref*);
+ int		decrypt(void*, void*, int);
+ void		delay(int);
+@@ -141,6 +143,7 @@
+ void		iunlock(Lock*);
+ ulong		imagecached(void);
+ ulong		imagereclaim(int);
++void		inclimit(int, Lgrp*, int);
+ long		incref(Ref*);
+ void		init0(void);
+ void		initseg(void);
+@@ -210,6 +213,7 @@
+ int		needpages(void*);
+ Chan*		newchan(void);
+ int		newfd(Chan*, int);
++Lgrp*		newlgrp(Lgrp*);
+ Mhead*		newmhead(Chan*);
+ Mount*		newmount(Chan*, int, char*);
+ Page*		newpage(int, Segment **, uintptr);
+--- a/sys/src/9/port/proc.c
++++ b/sys/src/9/port/proc.c
+@@ -1243,6 +1243,7 @@
+ 	Egrp *egrp;
+ 	Rgrp *rgrp;
+ 	Pgrp *pgrp;
++	Lgrp *lgrp;
+ 	Chan *dot;
+ 	void (*pt)(Proc*, int, vlong);
+ 
+@@ -1262,6 +1263,8 @@
+ 	up->rgrp = nil;
+ 	pgrp = up->pgrp;
+ 	up->pgrp = nil;
++	lgrp = up->lgrp;
++	up->lgrp = nil;
+ 	dot = up->dot;
+ 	up->dot = nil;
+ 	qunlock(&up->debug);
+@@ -1276,6 +1279,10 @@
+ 		cclose(dot);
+ 	if(pgrp != nil)
+ 		closepgrp(pgrp);
++	if(lgrp != nil) {
++		declimit(LTPROC, lgrp, 1);
++		closelgrp(lgrp);
++	}
+ 
+ 	if(up->parentpid == 0){
+ 		if(exitstr == nil)
+--- a/sys/src/9/port/segment.c
++++ b/sys/src/9/port/segment.c
+@@ -56,6 +56,10 @@
+ 	s = malloc(sizeof(Segment));
+ 	if(s == nil)
+ 		error(Enomem);
++	if(waserror()) {
++		if(s) free(s);
++		nexterror();
++	}
+ 	s->ref = 1;
+ 	s->type = type;
+ 	s->base = base;
+@@ -70,13 +74,17 @@
+ 		return s;
+ 	}
+ 
++	if(up && up->lgrp) s->lgrp = up->lgrp;
++	if(s->lgrp != nil) {
++		inclimit(LTPAGE, s->lgrp, size);
++		incref(s->lgrp);
++	}
++
+ 	mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
+ 	if(mapsize > nelem(s->ssegmap)){
+ 		s->map = malloc(mapsize*sizeof(Pte*));
+-		if(s->map == nil){
+-			free(s);
++		if(s->map == nil)
+ 			error(Enomem);
+-		}
+ 		s->mapsize = mapsize;
+ 	}
+ 	else{
+@@ -84,6 +92,7 @@
+ 		s->mapsize = nelem(s->ssegmap);
+ 	}
+ 
++	poperror();
+ 	return s;
+ }
+ 
+@@ -122,6 +131,11 @@
+ 			free(s->map);
+ 	}
+ 
++	if(s->lgrp != nil) {
++		declimit(LTPAGE, s->lgrp, s->size);
++		closelgrp(s->lgrp);
++	}
++
+ 	if(s->profile != nil)
+ 		free(s->profile);
+ 
+@@ -409,13 +423,15 @@
+ 		return s->base;
+ 
+ 	qlock(s);
++	if(waserror()) {
++		qunlock(s);
++		nexterror();
++	}
+ 
+ 	/* We may start with the bss overlapping the data */
+ 	if(addr < s->base) {
+-		if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
+-			qunlock(s);
++		if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base)
+ 			error(Enovmem);
+-		}
+ 		addr = s->base;
+ 	}
+ 
+@@ -427,13 +443,13 @@
+ 		 * to-be-freed address space may have been passed to the kernel
+ 		 * already by another proc and is past the validaddr stage.
+ 		 */
+-		if(s->ref > 1){
+-			qunlock(s);
++		if(s->ref > 1)
+ 			error(Einuse);
+-		}
+ 		mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
+ 		s->top = newtop;
+ 		s->size = newsize;
++		if(s->lgrp)
++			declimit(LTPAGE, s->lgrp, s->size - newsize);
+ 		qunlock(s);
+ 		flushmmu();
+ 		return 0;
+@@ -443,33 +459,39 @@
+ 		ns = up->seg[i];
+ 		if(ns == nil || ns == s)
+ 			continue;
+-		if(newtop > ns->base && s->base < ns->top) {
+-			qunlock(s);
++		if(newtop > ns->base && s->base < ns->top)
+ 			error(Esoverlap);
+-		}
+ 	}
+ 
+-	if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
+-		qunlock(s);
++	if(newsize > (SEGMAPSIZE*PTEPERTAB))
+ 		error(Enovmem);
+-	}
++
+ 	mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
+ 	if(mapsize > s->mapsize){
+ 		map = malloc(mapsize*sizeof(Pte*));
+-		if(map == nil){
+-			qunlock(s);
++		if(map == nil)
+ 			error(Enomem);
++		if(waserror()) {
++			free(map);
++			nexterror();
+ 		}
++		if(s->lgrp)
++			inclimit(LTPAGE, s->lgrp, newsize - s->size);
+ 		memmove(map, s->map, s->mapsize*sizeof(Pte*));
+ 		if(s->map != s->ssegmap)
+ 			free(s->map);
+ 		s->map = map;
+ 		s->mapsize = mapsize;
++		poperror();
++	} else {
++		if(s->lgrp)
++			inclimit(LTPAGE, s->lgrp, newsize - s->size);
+ 	}
+ 
+ 	s->top = newtop;
+ 	s->size = newsize;
+ 	qunlock(s);
++	poperror();
+ 	return 0;
+ }
+ 
+--- a/sys/src/9/port/sysproc.c
++++ b/sys/src/9/port/sysproc.c
+@@ -96,6 +96,8 @@
+ 	if((p = newproc()) == nil)
+ 		error("no procs");
+ 
++	inclimit(LTPROC, up->lgrp, 1);
++
+ 	qlock(&up->debug);
+ 	qlock(&p->debug);
+ 
+@@ -211,6 +213,10 @@
+ 		p->egrp = up->egrp;
+ 		incref(p->egrp);
+ 	}
++
++	/* Limit group */
++	p->lgrp = up->lgrp;
++	incref(p->lgrp);
+ 
+ 	procfork(p);
+ 
+--- a/sys/src/9/port/userinit.c
++++ b/sys/src/9/port/userinit.c
+@@ -33,6 +33,7 @@
+ 	up->egrp->ref = 1;
+ 	up->fgrp = dupfgrp(nil);
+ 	up->rgrp = newrgrp();
++	up->lgrp = newlgrp(nil);
+ 
+ 	/*
+ 	 * These are o.k. because rootinit is null.
+--- a/sys/src/9/ppc/mkfile
++++ b/sys/src/9/ppc/mkfile
+@@ -19,6 +19,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	log.$O\
+ 	rebootcmd.$O\
+ 	page.$O\
+--- a/sys/src/9/sgi/mkfile
++++ b/sys/src/9/sgi/mkfile
+@@ -26,6 +26,7 @@
+ 	edf.$O\
+ 	fault.$O\
+ 	fptrap.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	page.$O\
+ 	parse.$O\
+--- a/sys/src/9/teg2/mkfile
++++ b/sys/src/9/teg2/mkfile
+@@ -23,6 +23,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	rebootcmd.$O\
+ 	page.$O\
+--- a/sys/src/9/xen/mkfile
++++ b/sys/src/9/xen/mkfile
+@@ -23,6 +23,7 @@
+ 	edf.$O\
+ 	fault.$O\
+ 	iomap.$O\
++	limit.$O\
+ 	page.$O\
+ 	parse.$O\
+ 	pgrp.$O\
+--- a/sys/src/9/zynq/mkfile
++++ b/sys/src/9/zynq/mkfile
+@@ -20,6 +20,7 @@
+ 	dev.$O\
+ 	edf.$O\
+ 	fault.$O\
++	limit.$O\
+ 	mul64fract.$O\
+ 	rebootcmd.$O\
+ 	page.$O\
--- /dev/null
+++ b/README
@@ -1,0 +1,121 @@
+
+
+
+     LIMIT(3)                                                 LIMIT(3)
+
+
+
+
+
+     NAME
+          limit - system limit interface
+
+     SYNOPSIS
+          bind #Λ /mnt/limit
+
+          /mnt/limit/ctl
+          /mnt/limit/clone
+          /mnt/limit/switch
+          /mnt/limit/status
+          /mnt/limit/groups/...
+
+     DESCRIPTION
+          The #Λ device provides the interface to the system's limit
+          mechanism.  Each process has a limit group assigned. Groups
+          are created as children of other groups, are represented by
+          an id, and follow the restrictions imposed by their parents.
+          A group is deleted the moment there are no more processes or
+          segments referring to it.  The group is owned by the uname
+          and gid of the creator process, and the attributes can be
+          changed. The system's group, with id 1, imposes no restric-
+          tions and does not increase counters.
+
+          The root directory contains the ctl, clone, switch, status
+          files, and a directory named groups.  The current process'
+          limit group is always presented in the root directory, and
+          other groups, in the groups subdirectory, where they have
+          the same files, except for the groups .
+
+          The ctl file controls the limit group's attributes, namely,
+          the label, and the limits.  Reading the file returns the
+          limit group's id. The following commands are supported:
+
+
+               label label    Set the group's label; the labels need
+                              not be unique.
+
+               mlim number    Set the maximum amount of groups that
+                              can be created from this group.
+
+               mproc number   Set the maximum amount of processes that
+                              can be forked inside this group.
+
+               mpage number   Set the maximum total amount of pages
+                              that segments inside the limit group can
+                              have.
+
+          Reading the status file shows the group's attributes: the
+          label (or an empty line if not set), the parent's id, the
+          current limit restrictions on the left, and the counts, on
+          the right.
+
+          To create a new group, the clone file should be opened.
+          Reading from the fid returns the newly created group's id.
+          This creates a new reference, and when the fid is clunked,
+          the reference is destroyed. A reference is also created for
+          the parent limit group.  Thus, the opener's process limit
+          group is updated.
+
+          To switch to an existing limit group, the switch should be
+          opened.
+
+          The following limits exist and are imposed: limit limits,
+          which represent the amount of maximum amount of limits which
+          can be and process limits, which represent the maximum num-
+          ber of processes which can exist in the group. The kernel
+          already imposes limits on the number of files which can be
+          opened, so implementing that should be redundant.
+
+     EXAMPLES
+          To create a new limit group and restrict the amount of sub-
+          limits and processes:
+
+               % <>[10] /mnt/limit/clone
+               % echo label mygroup >/mnt/limit/ctl
+               % echo mlim 1 >/mnt/limit/ctl
+               % echo mproc 20 >/mnt/limit/ctl
+
+          Snippet to attach to an existing limit group, looking for a
+          specific label:
+
+               #!/bin/rc
+
+               slabel=$1
+               shift
+               prog=$*
+
+               bind '#Λ' /mnt/limit
+
+               for (lgrpstat in `{walk -f /mnt/limit/groups | grep 'status$'}) {
+                    lgrpdir=`{basename -d $lgrpstat}
+                    lgrplabel=`{cat $lgrpstat | sed 1q}
+                    if(~ $lgrplabel $slabel) {
+                         <>[10] $lgrpdir/switch
+                         exec $prog
+                    }
+               }
+
+               echo lgrp $slabel not found
+               exit nolgrp
+
+     SOURCE
+          /sys/src/9/port/limit.c
+          /sys/src/9/port/devlimit.c
+
+     BUGS
+          Not all processes use pexit , probably the ones which are
+          created during boot. This has to be checked.
+
+          Spawning a lot of processes inside a limit group which is
+          not the root group can cause a kernel panic related to run-
+          lock. Needs to be investigated.