ref: a9f94671c16558d2155ae9c37cd8c4e4101fef46
author: kitzman <kitzman@disroot.org>
date: Thu Feb 22 02:45:52 EST 2024
added patch and README
--- /dev/null
+++ b/0001-limits-added-devlimit-process-and-memory-limits.patch
@@ -1,0 +1,1234 @@
+From: kitzman <kitzman@disroot.org>
+Date: Sun, 18 Feb 2024 09:36:59 +0000
+Subject: [PATCH] limits: added devlimit, process and memory limits
+
+---
+diff 79a7b4ae59c2e0352b354cf719bc7ef4055f83ae c7a44c1564d2fab0b82b744927ed0f2be468fa1b
+--- /dev/null
++++ b/sys/man/3/limit
+@@ -1,0 +1,136 @@
++.TH LIMIT 3
++.SH NAME
++limit \- system limit interface
++.SH SYNOPSIS
++.nf
++.B bind #Λ /mnt/limit
++
++.B /mnt/limit/ctl
++.B /mnt/limit/clone
++.B /mnt/limit/switch
++.B /mnt/limit/status
++.BI /mnt/limit/groups/ \&...
++.fi
++.SH DESCRIPTION
++The
++.B #Λ
++device provides the interface to the system's limit mechanism.
++Each process has a limit group assigned. Groups are created as
++children of other groups, are represented by an id, and follow the
++restrictions imposed by their parents. A group is deleted the
++moment there are no more processes or segments referring to it.
++The group is
++.IR owned
++by the uname and gid of the creator process, and the attributes
++can be changed. The system's group, with id 1, imposes no
++restrictions and does not increase counters.
++.PP
++The root directory contains the
++.BR ctl ,
++.BR clone ,
++.BR switch ,
++.BR status
++files, and a directory named
++.BR groups .
++The current process' limit group is always presented in the root directory,
++and other groups, in the
++.B groups
++subdirectory, where they have the same files, except for the
++.B groups .
++.PP
++The
++.B ctl
++file controls the limit group's attributes, namely, the label, and the limits.
++Reading the file returns the limit group's id. The following commands are
++supported:
++.RS
++.TF "\fLmproc number \fR"
++.PD
++.
++.TP
++.BI "label " label
++Set the group's label; the labels need not be unique.
++.TP
++.BI "mlim " number
++Set the maximum amount of groups that can be
++created from this group.
++.TP
++.BI "mproc " number
++Set the maximum amount of processes that can be
++forked inside this group.
++.TP
++.BI "mpage " number
++Set the maximum total amount of pages that segments
++inside the limit group can have.
++.PD
++.RE
++.PP
++Reading the
++.B status
++file shows the group's attributes: the label (or an empty line
++if not set), the current limit restrictions on the left, and the counts,
++on the right.
++.PP
++To create a new group, the
++.B clone
++file should be opened. Reading from the fid returns the newly created
++group's id. This creates a new reference, and when the fid is clunked,
++the reference is destroyed. A reference is also created for the parent limit group.
++Thus, the opener's process limit group is updated.
++.PP
++To switch to an existing limit group, the
++.B switch
++should be opened.
++.PP
++The following limits exist and are imposed: limit limits, which represent the
++amount of maximum amount of limits which can be \"forked\" from this group,
++and process limits, which represent the maximum number of processes which can
++exist in the group. The kernel already imposes limits on the number of files
++which can be opened, so implementing that should be redundant.
++.SH EXAMPLES
++To create a new limit group and restrict the amount of sublimits and processes:
++.IP
++.EX
++% <>[10] /mnt/limit/clone
++% echo label mygroup >/mnt/limit/ctl
++% echo mlim 1 >/mnt/limit/ctl
++% echo mproc 20 >/mnt/limit/ctl
++.EE
++.PP
++Snippet to attach to an existing limit group, looking for a specific label:
++.IP
++.EX
++#!/bin/rc
++
++slabel=$1
++shift
++prog=$*
++
++bind '#Λ' /mnt/limit
++
++for (lgrpstat in `{walk -f /mnt/limit/groups | grep 'status$'}) {
++ lgrpdir=`{basename -d $lgrpstat}
++ lgrplabel=`{cat $lgrpstat | sed 1q}
++ if(~ $lgrplabel $slabel) {
++ <>[10] $lgrpdir/switch
++ exec $prog
++ }
++}
++
++echo lgrp $slabel not found
++exit nolgrp
++.EE
++.SH SOURCE
++.B /sys/src/9/port/limit.c
++.br
++.B /sys/src/9/port/devlimit.c
++.SH BUGS
++.PP
++Not all processes use
++.B pexit ,
++probably the ones which are created during boot. This
++has to be checked.
++.PP
++Spawning a lot of processes inside a limit group which is not
++the root group can cause a kernel panic related
++to runlock. Needs to be investigated.
+--- a/sys/src/9/arm64/mkfile
++++ b/sys/src/9/arm64/mkfile
+@@ -20,6 +20,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/bcm/mkfile
++++ b/sys/src/9/bcm/mkfile
+@@ -21,6 +21,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/bcm64/mkfile
++++ b/sys/src/9/bcm64/mkfile
+@@ -19,6 +19,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/cycv/mkfile
++++ b/sys/src/9/cycv/mkfile
+@@ -20,6 +20,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+ page.$O\
+--- a/sys/src/9/imx8/mkfile
++++ b/sys/src/9/imx8/mkfile
+@@ -20,6 +20,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/kw/mkfile
++++ b/sys/src/9/kw/mkfile
+@@ -21,6 +21,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+ page.$O\
+--- a/sys/src/9/mt7688/mkfile
++++ b/sys/src/9/mt7688/mkfile
+@@ -24,6 +24,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/mtx/mkfile
++++ b/sys/src/9/mtx/mkfile
+@@ -18,6 +18,7 @@
+ edf.$O\
+ fault.$O\
+ iomap.$O\
++ limit.$O\
+ log.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+--- a/sys/src/9/omap/mkfile
++++ b/sys/src/9/omap/mkfile
+@@ -22,6 +22,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+ page.$O\
+--- a/sys/src/9/pc/mkfile
++++ b/sys/src/9/pc/mkfile
+@@ -25,6 +25,7 @@
+ edf.$O\
+ fault.$O\
+ iomap.$O\
++ limit.$O\
+ memmap.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/pc64/mkfile
++++ b/sys/src/9/pc64/mkfile
+@@ -23,6 +23,7 @@
+ edf.$O\
+ fault.$O\
+ iomap.$O\
++ limit.$O\
+ memmap.$O\
+ page.$O\
+ parse.$O\
+--- /dev/null
++++ b/sys/src/9/port/devlimit.c
+@@ -1,0 +1,415 @@
++#include "u.h"
++#include "../port/lib.h"
++#include "mem.h"
++#include "dat.h"
++#include "fns.h"
++#include "../port/error.h"
++
++extern ulong kerndate;
++
++extern Lgrp *lgrptab[LIMMAX];
++extern Lock lgrptablock;
++
++Lgrp* getlgrp(int);
++void switchlgrp(Lgrp*);
++
++/* filesystem */
++enum {
++ LimitQidPos = 8,
++ LimitQidMask = 0xff,
++ LabelSize = 64,
++ StatusSize = 256,
++};
++
++#define QID(q) ((int)(q & LimitQidMask))
++#define LGID(q) ((int)(q >> LimitQidPos))
++#define LGPATH(i, q) (((uvlong)i << LimitQidPos) + (uvlong)q)
++
++enum
++{
++ Qroot,
++ Qctl,
++ Qclone,
++ Qswitch,
++ Qstatus,
++ Qgroups,
++};
++
++static Dirtab limitdir[] =
++{
++ ".", {Qroot, 0, QTDIR}, 0, DMDIR|0550,
++ "ctl", {Qctl}, 0, 0640,
++ "clone", {Qclone}, 0, 0440,
++ "switch", {Qswitch}, 0, 0440,
++ "status", {Qstatus}, 0, 0440,
++ "groups", {Qgroups, 0, QTDIR}, 0, DMDIR|0550,
++};
++
++// ctl commands
++enum {
++ CMlabel,
++ CMsetmlim,
++ CMsetmproc,
++ CMsetmpage,
++};
++
++static
++Cmdtab limitcmd[] = {
++ CMlabel, "label", 2,
++ CMsetmlim, "mlim", 2,
++ CMsetmproc, "mproc", 2,
++ CMsetmpage, "mpage", 2,
++};
++
++void
++limdir(Chan *c, Qid qid, char *n, vlong length, char *user, char *group, long perm, Dir *db)
++{
++ db->name = n;
++ if(c->flag&CMSG)
++ qid.type |= QTMOUNT;
++ db->qid = qid;
++ db->type = devtab[c->type]->dc;
++ db->dev = c->dev;
++ db->mode = perm;
++ db->mode |= qid.type << 24;
++ db->atime = seconds();
++ db->mtime = kerndate;
++ db->length = length;
++ db->uid = user;
++ db->gid = group;
++ db->muid = user;
++}
++
++static int
++limgen(Chan *c, char *name, Dirtab* tab, int ntab, int s, Dir *dp)
++{
++ Lgrp *l;
++ Qid q;
++ long perm;
++
++ int lgid = LGID(c->qid.path);
++ int i;
++
++ if(lgid)
++ l = getlgrp(lgid);
++ else
++ l = up->lgrp;
++ if(l == nil)
++ return -1;
++
++ /* device root */
++ if(s == DEVDOTDOT){
++ rlock(l);
++ if(lgid == 0) {
++ c->qid.vers = 1;
++ limdir(c, c->qid, "#λ", 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
++ } else {
++ limdir(c, tab[Qgroups].qid, tab[Qgroups].name, 0, l->uid, l->gid, (long)tab[Qgroups].perm, dp);
++ }
++ runlock(l);
++ return 1;
++ }
++
++ /* tab is part of every gen due to (i) and (ii) */
++ if(QID(c->qid.path) == Qgroups) goto groupsgen; // or not?
++ if(name) {
++ if(lgid != 0 && strcmp(name, tab[Qgroups].name) == 0)
++ return 0;
++ if(strcmp(name, tab[QID(c->qid.path)].name) == 0)
++ return -1;
++ for(i = 0; i < ntab; i++) {
++ if(strcmp(name, tab[i].name) == 0) {
++ rlock(l);
++ perm = tab[i].perm;
++ if(i == Qctl && (lgid == 1 || (lgid == 0 && up->lgrp->lgid == 1)))
++ perm = 0440;
++ mkqid(&q, LGPATH(lgid, i), 0, tab[i].qid.type);
++ limdir(c, q, name, 0, l->uid, l->gid, perm, dp);
++ runlock(l);
++ return 1;
++ }
++ }
++ } else {
++ if(s < ntab) {
++ if(lgid != 0 && s == Qgroups)
++ return 0;
++ if(QID(c->qid.path) == s)
++ return 0;
++ rlock(l);
++ perm = tab[s].perm;
++ if(s == Qctl && (lgid == 1 || (lgid == 0 && up->lgrp->lgid == 1)))
++ perm = 0440;
++ mkqid(&q, LGPATH(lgid, s), 0, tab[s].qid.type);
++ limdir(c, q, tab[s].name, 0, l->uid, l->gid, perm, dp);
++ runlock(l);
++ return 1;
++ }
++ }
++
++ /* the lgrp dirs are only part of Qgroups (i) and the dirs themselves (ii) */
++groupsgen:
++ if(QID(c->qid.path) == Qgroups && s < ntab)
++ return 0;
++ if(QID(c->qid.path) != Qgroups || (QID(c->qid.path) == Qroot && lgid == 0))
++ return -1;
++ if(name) {
++ i = atoi(name);
++ if(i > LIMMAX || i < 1)
++ return -1;
++ if(l = getlgrp(i)) {
++ rlock(l);
++ mkqid(&q, LGPATH(i, Qroot), 0, QTDIR);
++ limdir(c, q, name, 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
++ runlock(l);
++ return 1;
++ }
++ } else {
++ i = s - ntab + 1;
++ if(i > LIMMAX || i < 1)
++ return -1;
++ if(l = getlgrp(i)) {
++ rlock(l);
++ name = malloc(NUMSIZE);
++ snprint(name, NUMSIZE, "%d", s - ntab + 1);
++ mkqid(&q, LGPATH(i, Qroot), 0, QTDIR);
++ limdir(c, q, name, 0, l->uid, l->gid, (long)tab[Qroot].perm, dp);
++ runlock(l);
++ return 1;
++ }
++ return 0;
++ }
++ return -1;
++}
++
++static Chan*
++limattach(char *spec)
++{
++ return devattach(L'Λ', spec);
++}
++
++static Walkqid*
++limwalk(Chan *c, Chan *nc, char **name, int nname)
++{
++ return devwalk(c, nc, name, nname, limitdir, nelem(limitdir), limgen);
++}
++
++static int
++limstat(Chan *c, uchar *db, int n)
++{
++ return devstat(c, db, n, limitdir, nelem(limitdir), limgen);
++}
++
++static Chan*
++limopen(Chan *c, int omode)
++{
++ Chan *co;
++ Lgrp *l = up->lgrp;
++ int lgid = LGID(c->qid.path);
++
++ if(lgid)
++ l = getlgrp(lgid);
++ if(l == nil)
++ error(Enonexist);
++
++ if(c->qid.type & QTDIR)
++ if(omode != OREAD)
++ error(Eperm);
++
++ co = devopen(c, omode, limitdir, nelem(limitdir), limgen);
++
++ switch(QID(c->qid.path)) {
++ case Qclone:
++ l = newlgrp(l);
++ switchlgrp(l);
++ break;
++ case Qswitch:
++ switchlgrp(l);
++ break;
++ }
++
++ return co;
++}
++
++static void
++limclose(Chan *c)
++{
++ Lgrp *l = up->lgrp;
++ int lgid = LGID(c->qid.path);
++
++ if(lgid)
++ l = getlgrp(lgid);
++ if(l == nil)
++ error(Enonexist);
++}
++
++static void
++limremove(Chan*)
++{
++ error(Eperm);
++}
++
++static long
++limread(Chan *c, void *va, long n, vlong off)
++{
++ Lgrp *l = up->lgrp;
++ char *buf;
++ long m;
++ int lgid = LGID(c->qid.path);
++
++ if(lgid)
++ l = getlgrp(lgid);
++ if(l == nil)
++ error(Enonexist);
++
++ switch(QID(c->qid.path)){
++ case Qroot:
++ return devdirread(c, va, n, limitdir, nelem(limitdir), limgen);
++ case Qctl:
++ rlock(l);
++ m = readnum((ulong) off, va, n, l->lgid, NUMSIZE);
++ runlock(l);
++ return m;
++ break;
++ case Qclone:
++ case Qswitch:
++ return readstr((ulong) off, va, n, "");
++ break;
++ case Qstatus:
++ buf = malloc(StatusSize);
++ rlock(l);
++ if(l->label) m = snprint(buf, LabelSize + 1, "%s\n", l->label);
++ else m = snprint(buf, LabelSize + 1, "\n");
++ snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mlim, NUMSIZE-1, l->clim);
++ snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mproc, NUMSIZE-1, l->cproc);
++ snprint(buf, StatusSize, "%s%*lud %*lud\n", buf, NUMSIZE-1, l->mpage, NUMSIZE-1, l->cpage);
++ runlock(l);
++ m = readstr((ulong) off, va, n, buf);
++ free(buf);
++ return m;
++ case Qgroups:
++ if(lgid != 0)
++ error(Eperm);
++ return devdirread(c, va, n, limitdir, nelem(limitdir), limgen);
++ default:
++ error(Eperm);
++ break;
++ }
++}
++
++static long
++limwrite(Chan *c, void *va, long n, vlong)
++{
++ Lgrp *l = up->lgrp;
++ Cmdbuf *cb;
++ Cmdtab *ct;
++ char *label, *newm;
++ long m;
++ int lgid = LGID(c->qid.path);
++
++ if(lgid)
++ l = getlgrp(lgid);
++ if(l == nil)
++ error(Enonexist);
++
++ switch(QID(c->qid.path)){
++ case Qctl:
++ cb = parsecmd(va, n);
++ if(waserror()) {
++ free(cb);
++ nexterror();
++ }
++ ct = lookupcmd(cb, limitcmd, nelem(limitcmd));
++ if(ct == nil)
++ error(Ebadctl);
++
++ switch(ct->index) {
++ case CMlabel:
++ label = cb->f[1];
++ if(strlen(label) > LabelSize - 1)
++ error(Eperm);
++ wlock(l);
++ kstrdup(&l->label, label);
++ wunlock(l);
++ break;
++ case CMsetmlim:
++ newm = cb->f[1];
++ m = atoi(newm);
++ if(!m)
++ error(Ebadctl);
++ wlock(l);
++ l->mlim = m;
++ wunlock(l);
++ break;
++ case CMsetmproc:
++ newm = cb->f[1];
++ m = atoi(newm);
++ if(!m)
++ error(Ebadctl);
++ wlock(l);
++ l->mproc = m;
++ wunlock(l);
++ break;
++ case CMsetmpage:
++ newm = cb->f[1];
++ m = atoi(newm);
++ if(!m)
++ error(Ebadctl);
++ wlock(l);
++ l->mpage = m;
++ wunlock(l);
++ break;
++ default:
++ error(Ebadctl);
++ break;
++ }
++ free(cb);
++ poperror();
++ break;
++ default:
++ error(Eperm);
++ break;
++ }
++
++ return n;
++}
++
++Dev limitdevtab = {
++ L'Λ',
++ "limit",
++
++ devreset,
++ devinit,
++ devshutdown,
++ limattach,
++ limwalk,
++ limstat,
++ limopen,
++ devcreate,
++ limclose,
++ limread,
++ devbread,
++ limwrite,
++ devbwrite,
++ limremove,
++ devwstat,
++};
++
++/* helper functions */
++Lgrp*
++getlgrp(int lgid)
++{
++ Lgrp *l;
++ lock(&lgrptablock);
++ l = lgrptab[lgid - 1];
++ unlock(&lgrptablock);
++ return l;
++}
++
++void
++switchlgrp(Lgrp *l)
++{
++ Lgrp *o = up->lgrp;
++ incref(l);
++ up->lgrp = l;
++ closelgrp(o);
++}
+--- /dev/null
++++ b/sys/src/9/port/limit.c
+@@ -1,0 +1,216 @@
++#include "u.h"
++#include "../port/lib.h"
++#include "mem.h"
++#include "dat.h"
++#include "fns.h"
++#include "../port/error.h"
++
++Lgrp *lgrptab[LIMMAX] = { nil };
++Lock lgrptablock;
++
++/* helper functions */
++void
++addchild(Lgrp *parent, Lgrp *new)
++{
++ wlock(parent);
++ incref(parent);
++ if(parent->submax == 0 || parent->subgrp == nil) {
++ parent->submax = LIMINISUB;
++ parent->subgrp = malloc(LIMINISUB);
++ }
++ if(parent->subcount + 1 > parent->submax) {
++ parent->subgrp = realloc(parent->subgrp, parent->submax * 2);
++ parent->submax *= 2;
++ }
++ parent->subgrp[parent->subcount] = new;
++ parent->subcount++;
++ wunlock(parent);
++}
++
++void
++removechild(Lgrp *old)
++{
++ Lgrp *parent;
++ int i;
++
++ if(old->parent) parent = old->parent; else return;
++ wlock(parent);
++ for(i = 0; i < parent->subcount; i++)
++ if(parent->subgrp[i] == old)
++ break;
++ if(i == parent->subcount) {
++ wunlock(parent);
++ return;
++ }
++
++ for(; i < parent->subcount - 1; i++)
++ parent->subgrp[i] = parent->subgrp[i + 1];
++ parent->subcount--;
++
++ wunlock(parent);
++ closelgrp(parent);
++}
++
++/* kernel functions */
++Lgrp*
++newlgrp(Lgrp *parent)
++{
++ int lgid;
++
++ lock(&lgrptablock);
++ for(lgid = 0; lgid < LIMMAX; lgid++)
++ if(lgrptab[lgid] == nil)
++ break;
++
++ if(waserror()) {
++ unlock(&lgrptablock);
++ nexterror();
++ }
++ if(lgid == LIMMAX)
++ error("system has reached the maximum amount of limits");
++ lgid++;
++
++ if(parent) inclimit(LTLIM, parent, 1);
++
++ Lgrp* l = malloc(sizeof(Lgrp));
++ l->lgid = lgid;
++ if(parent) addchild(parent, l);
++ if(parent) rlock(parent);
++ wlock(l);
++ lgrptab[lgid - 1] = l;
++ poperror();
++ unlock(&lgrptablock);
++ l->clim = 0;
++ l->cproc = 0;
++ l->cpage = 0;
++ l->mlim = 0;
++ l->mproc = 0;
++ l->mpage = 0;
++ l->subcount = 0;
++ l->submax = 0;
++ l->subgrp = nil;
++ if(parent) {
++ l->mlim = parent->mlim;
++ l->mproc = parent->mproc;
++ l->mpage = parent->mpage;
++ }
++ if(up->user) {
++ kstrdup(&l->uid, up->user);
++ kstrdup(&l->gid, up->user);
++ } else {
++ kstrdup(&l->uid, eve);
++ kstrdup(&l->gid, eve);
++ }
++ l->parent = parent;
++ l->subgrp = nil;
++ if(parent) runlock(parent);
++ wunlock(l);
++
++ return l;
++}
++
++void
++inclimit(int limit, Lgrp *l, int q)
++{
++ Lgrp *c;
++ ulong cval, mval;
++
++ for(c = l; c; c = c->parent) {
++ if(c->lgid == 1)
++ break;
++ rlock(c);
++ switch(limit) {
++ case LTLIM:
++ cval = c->clim;
++ mval = c->mlim;
++ break;
++ case LTPROC:
++ cval = c->cproc;
++ mval = c->mproc;
++ break;
++ case LTPAGE:
++ cval = c->cpage;
++ mval = c->mpage;
++ break;
++ default:
++ runlock(c);
++ error("unknown limit type");
++ }
++ if(mval && cval + q > mval) {
++ runlock(c);
++ error("limit reached");
++ }
++ runlock(c);
++ }
++ /* small amounts over the limit can't hurt */
++ for(c = l; c; c = c->parent) {
++ if(c->lgid == 1)
++ break;
++ wlock(c);
++ switch(limit) {
++ case LTLIM:
++ c->clim += q;
++ break;
++ case LTPROC:
++ c->cproc += q;
++ break;
++ case LTPAGE:
++ c->cpage += q;
++ break;
++ default:
++ wunlock(c);
++ error("unknown limit type");
++ }
++ wunlock(c);
++ }
++}
++
++void
++declimit(int limit, Lgrp *l, int q)
++{
++ Lgrp *c;
++
++ for(c = l; c; c = c->parent) {
++ if(c->lgid == 1)
++ break;
++ wlock(c);
++ switch(limit) {
++ case LTLIM:
++ c->clim -= q;
++ break;
++ case LTPROC:
++ c->cproc -= q;
++ break;
++ case LTPAGE:
++ c->cpage -= q;
++ break;
++ default:
++ wunlock(c);
++ error("unknown limit type");
++ }
++ wunlock(c);
++ }
++}
++
++void
++closelgrp(Lgrp* l)
++{
++ if(decref(l) == 0) {
++ wlock(l);
++ if(waserror()) {
++ wunlock(l);
++ nexterror();
++ }
++ if(l->parent) declimit(LTLIM, l->parent, 1);
++ removechild(l);
++ wunlock(l);
++ poperror();
++ lock(&lgrptablock);
++ lgrptab[l->lgid - 1] = nil;
++ unlock(&lgrptablock);
++ if(l->label) free(l->label);
++ free(l->uid);
++ free(l->gid);
++ free(l);
++ }
++}
+--- a/sys/src/9/port/portdat.h
++++ b/sys/src/9/port/portdat.h
+@@ -14,6 +14,7 @@
+ typedef struct Image Image;
+ typedef struct Log Log;
+ typedef struct Logflag Logflag;
++typedef struct Lgrp Lgrp;
+ typedef struct Mntcache Mntcache;
+ typedef struct Mount Mount;
+ typedef struct Mntrah Mntrah;
+@@ -433,6 +434,7 @@
+ Pte *ssegmap[SSEGMAPSIZE];
+ Sema sema;
+ ulong mark; /* portcountrefs */
++ Lgrp *lgrp;
+ };
+
+ struct Segio
+@@ -538,6 +540,31 @@
+ DELTAFD = 20 /* incremental increase in Fgrp.fd's */
+ };
+
++struct Lgrp
++{
++ Ref;
++ RWlock;
++ int lgid;
++ ulong clim, mlim;
++ ulong cproc, mproc;
++ ulong cpage, mpage;
++ char* label;
++ char* uid;
++ char* gid;
++ Lgrp* parent;
++ uint subcount, submax;
++ Lgrp** subgrp;
++};
++
++enum
++{
++ LIMMAX = 4096,
++ LIMINISUB = 8,
++ LTLIM = 0,
++ LTPROC,
++ LTPAGE,
++};
++
+ struct Palloc
+ {
+ Lock;
+@@ -692,6 +719,7 @@
+ Egrp *egrp; /* Environment group */
+ Fgrp *fgrp; /* File descriptor group */
+ Rgrp *rgrp; /* Rendez group */
++ Lgrp *lgrp; /* Limit group */
+
+ Fgrp *closingfgrp; /* used during teardown */
+
+--- a/sys/src/9/port/portfns.h
++++ b/sys/src/9/port/portfns.h
+@@ -34,6 +34,7 @@
+ void ccloseq(Chan*);
+ void closeegrp(Egrp*);
+ void closefgrp(Fgrp*);
++void closelgrp(Lgrp*);
+ void closepgrp(Pgrp*);
+ void closergrp(Rgrp*);
+ long clrfpintr(void);
+@@ -55,6 +56,7 @@
+ void cupdate(Chan*, uchar*, int, vlong);
+ void cwrite(Chan*, uchar*, int, vlong);
+ uintptr dbgpc(Proc*);
++void declimit(int, Lgrp*, int);
+ long decref(Ref*);
+ int decrypt(void*, void*, int);
+ void delay(int);
+@@ -141,6 +143,7 @@
+ void iunlock(Lock*);
+ ulong imagecached(void);
+ ulong imagereclaim(int);
++void inclimit(int, Lgrp*, int);
+ long incref(Ref*);
+ void init0(void);
+ void initseg(void);
+@@ -210,6 +213,7 @@
+ int needpages(void*);
+ Chan* newchan(void);
+ int newfd(Chan*, int);
++Lgrp* newlgrp(Lgrp*);
+ Mhead* newmhead(Chan*);
+ Mount* newmount(Chan*, int, char*);
+ Page* newpage(int, Segment **, uintptr);
+--- a/sys/src/9/port/proc.c
++++ b/sys/src/9/port/proc.c
+@@ -1243,6 +1243,7 @@
+ Egrp *egrp;
+ Rgrp *rgrp;
+ Pgrp *pgrp;
++ Lgrp *lgrp;
+ Chan *dot;
+ void (*pt)(Proc*, int, vlong);
+
+@@ -1262,6 +1263,8 @@
+ up->rgrp = nil;
+ pgrp = up->pgrp;
+ up->pgrp = nil;
++ lgrp = up->lgrp;
++ up->lgrp = nil;
+ dot = up->dot;
+ up->dot = nil;
+ qunlock(&up->debug);
+@@ -1276,6 +1279,10 @@
+ cclose(dot);
+ if(pgrp != nil)
+ closepgrp(pgrp);
++ if(lgrp != nil) {
++ declimit(LTPROC, lgrp, 1);
++ closelgrp(lgrp);
++ }
+
+ if(up->parentpid == 0){
+ if(exitstr == nil)
+--- a/sys/src/9/port/segment.c
++++ b/sys/src/9/port/segment.c
+@@ -56,6 +56,10 @@
+ s = malloc(sizeof(Segment));
+ if(s == nil)
+ error(Enomem);
++ if(waserror()) {
++ if(s) free(s);
++ nexterror();
++ }
+ s->ref = 1;
+ s->type = type;
+ s->base = base;
+@@ -70,13 +74,17 @@
+ return s;
+ }
+
++ if(up && up->lgrp) s->lgrp = up->lgrp;
++ if(s->lgrp != nil) {
++ inclimit(LTPAGE, s->lgrp, size);
++ incref(s->lgrp);
++ }
++
+ mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
+ if(mapsize > nelem(s->ssegmap)){
+ s->map = malloc(mapsize*sizeof(Pte*));
+- if(s->map == nil){
+- free(s);
++ if(s->map == nil)
+ error(Enomem);
+- }
+ s->mapsize = mapsize;
+ }
+ else{
+@@ -84,6 +92,7 @@
+ s->mapsize = nelem(s->ssegmap);
+ }
+
++ poperror();
+ return s;
+ }
+
+@@ -122,6 +131,11 @@
+ free(s->map);
+ }
+
++ if(s->lgrp != nil) {
++ declimit(LTPAGE, s->lgrp, s->size);
++ closelgrp(s->lgrp);
++ }
++
+ if(s->profile != nil)
+ free(s->profile);
+
+@@ -409,13 +423,15 @@
+ return s->base;
+
+ qlock(s);
++ if(waserror()) {
++ qunlock(s);
++ nexterror();
++ }
+
+ /* We may start with the bss overlapping the data */
+ if(addr < s->base) {
+- if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
+- qunlock(s);
++ if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base)
+ error(Enovmem);
+- }
+ addr = s->base;
+ }
+
+@@ -427,13 +443,13 @@
+ * to-be-freed address space may have been passed to the kernel
+ * already by another proc and is past the validaddr stage.
+ */
+- if(s->ref > 1){
+- qunlock(s);
++ if(s->ref > 1)
+ error(Einuse);
+- }
+ mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
+ s->top = newtop;
+ s->size = newsize;
++ if(s->lgrp)
++ declimit(LTPAGE, s->lgrp, s->size - newsize);
+ qunlock(s);
+ flushmmu();
+ return 0;
+@@ -443,33 +459,39 @@
+ ns = up->seg[i];
+ if(ns == nil || ns == s)
+ continue;
+- if(newtop > ns->base && s->base < ns->top) {
+- qunlock(s);
++ if(newtop > ns->base && s->base < ns->top)
+ error(Esoverlap);
+- }
+ }
+
+- if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
+- qunlock(s);
++ if(newsize > (SEGMAPSIZE*PTEPERTAB))
+ error(Enovmem);
+- }
++
+ mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
+ if(mapsize > s->mapsize){
+ map = malloc(mapsize*sizeof(Pte*));
+- if(map == nil){
+- qunlock(s);
++ if(map == nil)
+ error(Enomem);
++ if(waserror()) {
++ free(map);
++ nexterror();
+ }
++ if(s->lgrp)
++ inclimit(LTPAGE, s->lgrp, newsize - s->size);
+ memmove(map, s->map, s->mapsize*sizeof(Pte*));
+ if(s->map != s->ssegmap)
+ free(s->map);
+ s->map = map;
+ s->mapsize = mapsize;
++ poperror();
++ } else {
++ if(s->lgrp)
++ inclimit(LTPAGE, s->lgrp, newsize - s->size);
+ }
+
+ s->top = newtop;
+ s->size = newsize;
+ qunlock(s);
++ poperror();
+ return 0;
+ }
+
+--- a/sys/src/9/port/sysproc.c
++++ b/sys/src/9/port/sysproc.c
+@@ -96,6 +96,8 @@
+ if((p = newproc()) == nil)
+ error("no procs");
+
++ inclimit(LTPROC, up->lgrp, 1);
++
+ qlock(&up->debug);
+ qlock(&p->debug);
+
+@@ -211,6 +213,10 @@
+ p->egrp = up->egrp;
+ incref(p->egrp);
+ }
++
++ /* Limit group */
++ p->lgrp = up->lgrp;
++ incref(p->lgrp);
+
+ procfork(p);
+
+--- a/sys/src/9/port/userinit.c
++++ b/sys/src/9/port/userinit.c
+@@ -33,6 +33,7 @@
+ up->egrp->ref = 1;
+ up->fgrp = dupfgrp(nil);
+ up->rgrp = newrgrp();
++ up->lgrp = newlgrp(nil);
+
+ /*
+ * These are o.k. because rootinit is null.
+--- a/sys/src/9/ppc/mkfile
++++ b/sys/src/9/ppc/mkfile
+@@ -19,6 +19,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ log.$O\
+ rebootcmd.$O\
+ page.$O\
+--- a/sys/src/9/sgi/mkfile
++++ b/sys/src/9/sgi/mkfile
+@@ -26,6 +26,7 @@
+ edf.$O\
+ fault.$O\
+ fptrap.$O\
++ limit.$O\
+ mul64fract.$O\
+ page.$O\
+ parse.$O\
+--- a/sys/src/9/teg2/mkfile
++++ b/sys/src/9/teg2/mkfile
+@@ -23,6 +23,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+ page.$O\
+--- a/sys/src/9/xen/mkfile
++++ b/sys/src/9/xen/mkfile
+@@ -23,6 +23,7 @@
+ edf.$O\
+ fault.$O\
+ iomap.$O\
++ limit.$O\
+ page.$O\
+ parse.$O\
+ pgrp.$O\
+--- a/sys/src/9/zynq/mkfile
++++ b/sys/src/9/zynq/mkfile
+@@ -20,6 +20,7 @@
+ dev.$O\
+ edf.$O\
+ fault.$O\
++ limit.$O\
+ mul64fract.$O\
+ rebootcmd.$O\
+ page.$O\
--- /dev/null
+++ b/README
@@ -1,0 +1,121 @@
+
+
+
+ LIMIT(3) LIMIT(3)
+
+
+
+
+
+ NAME
+ limit - system limit interface
+
+ SYNOPSIS
+ bind #Λ /mnt/limit
+
+ /mnt/limit/ctl
+ /mnt/limit/clone
+ /mnt/limit/switch
+ /mnt/limit/status
+ /mnt/limit/groups/...
+
+ DESCRIPTION
+ The #Λ device provides the interface to the system's limit
+ mechanism. Each process has a limit group assigned. Groups
+ are created as children of other groups, are represented by
+ an id, and follow the restrictions imposed by their parents.
+ A group is deleted the moment there are no more processes or
+ segments referring to it. The group is owned by the uname
+ and gid of the creator process, and the attributes can be
+ changed. The system's group, with id 1, imposes no restric-
+ tions and does not increase counters.
+
+ The root directory contains the ctl, clone, switch, status
+ files, and a directory named groups. The current process'
+ limit group is always presented in the root directory, and
+ other groups, in the groups subdirectory, where they have
+ the same files, except for the groups .
+
+ The ctl file controls the limit group's attributes, namely,
+ the label, and the limits. Reading the file returns the
+ limit group's id. The following commands are supported:
+
+
+ label label Set the group's label; the labels need
+ not be unique.
+
+ mlim number Set the maximum amount of groups that
+ can be created from this group.
+
+ mproc number Set the maximum amount of processes that
+ can be forked inside this group.
+
+ mpage number Set the maximum total amount of pages
+ that segments inside the limit group can
+ have.
+
+ Reading the status file shows the group's attributes: the
+ label (or an empty line if not set), the parent's id, the
+ current limit restrictions on the left, and the counts, on
+ the right.
+
+ To create a new group, the clone file should be opened.
+ Reading from the fid returns the newly created group's id.
+ This creates a new reference, and when the fid is clunked,
+ the reference is destroyed. A reference is also created for
+ the parent limit group. Thus, the opener's process limit
+ group is updated.
+
+ To switch to an existing limit group, the switch should be
+ opened.
+
+ The following limits exist and are imposed: limit limits,
+ which represent the amount of maximum amount of limits which
+ can be and process limits, which represent the maximum num-
+ ber of processes which can exist in the group. The kernel
+ already imposes limits on the number of files which can be
+ opened, so implementing that should be redundant.
+
+ EXAMPLES
+ To create a new limit group and restrict the amount of sub-
+ limits and processes:
+
+ % <>[10] /mnt/limit/clone
+ % echo label mygroup >/mnt/limit/ctl
+ % echo mlim 1 >/mnt/limit/ctl
+ % echo mproc 20 >/mnt/limit/ctl
+
+ Snippet to attach to an existing limit group, looking for a
+ specific label:
+
+ #!/bin/rc
+
+ slabel=$1
+ shift
+ prog=$*
+
+ bind '#Λ' /mnt/limit
+
+ for (lgrpstat in `{walk -f /mnt/limit/groups | grep 'status$'}) {
+ lgrpdir=`{basename -d $lgrpstat}
+ lgrplabel=`{cat $lgrpstat | sed 1q}
+ if(~ $lgrplabel $slabel) {
+ <>[10] $lgrpdir/switch
+ exec $prog
+ }
+ }
+
+ echo lgrp $slabel not found
+ exit nolgrp
+
+ SOURCE
+ /sys/src/9/port/limit.c
+ /sys/src/9/port/devlimit.c
+
+ BUGS
+ Not all processes use pexit , probably the ones which are
+ created during boot. This has to be checked.
+
+ Spawning a lot of processes inside a limit group which is
+ not the root group can cause a kernel panic related to run-
+ lock. Needs to be investigated.