ref: 3f71a1c946093b8d2390cc2f738063cb75bb5a17
parent: ac03b95b184b70f3fafc8f285bc2c4a796698035
	author: Tor Andersson <tor.andersson@artifex.com>
	date: Wed Aug 31 10:10:14 EDT 2022
	
Fast path for "simple" arrays. An array without holes and with only integer properties can be represented with a "flat" array part that allows for O(1) property access. If we ever add a non-integer property, create holes in the array, the whole array is unpacked into a normal string-keyed object. Also add fast integer indexing to be used on these arrays, before falling back to converting the integer to a string property lookup. Use JS_ARRAYLIMIT to restrict size of arrays to avoid integer overflows and out of memory thrashing.
--- a/jsarray.c
+++ b/jsarray.c
@@ -17,30 +17,6 @@
js_setproperty(J, idx < 0 ? idx - 1 : idx, "length");
}
-int js_hasindex(js_State *J, int idx, int i)
-{- char buf[32];
- return js_hasproperty(J, idx, js_itoa(buf, i));
-}
-
-void js_getindex(js_State *J, int idx, int i)
-{- char buf[32];
- js_getproperty(J, idx, js_itoa(buf, i));
-}
-
-void js_setindex(js_State *J, int idx, int i)
-{- char buf[32];
- js_setproperty(J, idx, js_itoa(buf, i));
-}
-
-void js_delindex(js_State *J, int idx, int i)
-{- char buf[32];
- js_delproperty(J, idx, js_itoa(buf, i));
-}
-
static void jsB_new_Array(js_State *J)
 {int i, top = js_gettop(J);
--- a/jsgc.c
+++ b/jsgc.c
@@ -42,6 +42,8 @@
js_free(J, obj->u.r.source);
js_regfreex(J->alloc, J->actx, obj->u.r.prog);
}
+ if (obj->type == JS_CARRAY && obj->u.a.simple)
+ js_free(J, obj->u.a.array);
if (obj->type == JS_CITERATOR)
jsG_freeiterator(J, obj->u.iter.head);
if (obj->type == JS_CUSERDATA && obj->u.user.finalize)
@@ -100,6 +102,16 @@
jsG_markproperty(J, mark, obj->properties);
if (obj->prototype && obj->prototype->gcmark != mark)
jsG_markobject(J, mark, obj->prototype);
+	if (obj->type == JS_CARRAY && obj->u.a.simple) {+ int i;
+		for (i = 0; i < obj->u.a.length; ++i) {+ js_Value *v = &obj->u.a.array[i];
+ if (v->type == JS_TMEMSTR && v->u.memstr->gcmark != mark)
+ v->u.memstr->gcmark = mark;
+ if (v->type == JS_TOBJECT && v->u.object->gcmark != mark)
+ jsG_markobject(J, mark, v->u.object);
+ }
+ }
 	if (obj->type == JS_CITERATOR && obj->u.iter.target->gcmark != mark) {jsG_markobject(J, mark, obj->u.iter.target);
}
--- a/jsi.h
+++ b/jsi.h
@@ -86,6 +86,9 @@
#ifndef JS_TRYLIMIT
#define JS_TRYLIMIT 64 /* exception stack size */
#endif
+#ifndef JS_ARRAYLIMIT
+#define JS_ARRAYLIMIT (1<<26) /* limit arrays to 64M entries (1G of flat array data) */
+#endif
#ifndef JS_GCFACTOR
/*
* GC will try to trigger when memory usage is this value times the minimum
--- a/jsobject.c
+++ b/jsobject.c
@@ -62,7 +62,24 @@
 {js_Object *self = js_toobject(J, 0);
const char *name = js_tostring(J, 1);
- js_Property *ref = jsV_getownproperty(J, self, name);
+ js_Property *ref;
+ int k;
+
+	if (self->type == JS_CSTRING) {+		if (js_isarrayindex(J, name, &k) && k >= 0 && k < self->u.s.length) {+ js_pushboolean(J, 1);
+ return;
+ }
+ }
+
+	if (self->type == JS_CARRAY && self->u.a.simple) {+		if (js_isarrayindex(J, name, &k) && k >= 0 && k < self->u.a.length) {+ js_pushboolean(J, 1);
+ return;
+ }
+ }
+
+ ref = jsV_getownproperty(J, self, name);
js_pushboolean(J, ref != NULL);
}
@@ -110,9 +127,10 @@
js_typeerror(J, "not an object");
obj = js_toobject(J, 1);
ref = jsV_getproperty(J, obj, js_tostring(J, 2));
- if (!ref)
+	if (!ref) {+ // TODO: builtin properties (string and array index and length, regexp flags, etc)
js_pushundefined(J);
-	else {+	} else {js_newobject(J);
 		if (!ref->getter && !ref->setter) {js_pushvalue(J, ref->value);
@@ -152,6 +170,7 @@
static void O_getOwnPropertyNames(js_State *J)
 {js_Object *obj;
+ char name[32];
int k;
int i;
@@ -169,6 +188,13 @@
 	if (obj->type == JS_CARRAY) {js_pushliteral(J, "length");
js_setindex(J, -2, i++);
+		if (obj->u.a.simple) {+			for (k = 0; k < obj->u.a.length; ++k) {+ js_itoa(name, k);
+ js_pushstring(J, name);
+ js_setindex(J, -2, i++);
+ }
+ }
}
 	if (obj->type == JS_CSTRING) {@@ -175,7 +201,8 @@
js_pushliteral(J, "length");
js_setindex(J, -2, i++);
 		for (k = 0; k < obj->u.s.length; ++k) {- js_pushnumber(J, k);
+ js_itoa(name, k);
+ js_pushstring(J, name);
js_setindex(J, -2, i++);
}
}
@@ -359,9 +386,12 @@
static void O_preventExtensions(js_State *J)
 {+ js_Object *obj;
if (!js_isobject(J, 1))
js_typeerror(J, "not an object");
- js_toobject(J, 1)->extensible = 0;
+ obj = js_toobject(J, 1);
+ jsR_unflattenarray(J, obj);
+ obj->extensible = 0;
js_copy(J, 1);
}
@@ -389,6 +419,7 @@
js_typeerror(J, "not an object");
obj = js_toobject(J, 1);
+ jsR_unflattenarray(J, obj);
obj->extensible = 0;
if (obj->properties->level)
@@ -446,6 +477,7 @@
js_typeerror(J, "not an object");
obj = js_toobject(J, 1);
+ jsR_unflattenarray(J, obj);
obj->extensible = 0;
if (obj->properties->level)
--- a/jsproperty.c
+++ b/jsproperty.c
@@ -1,6 +1,8 @@
#include "jsi.h"
#include "jsvalue.h"
+#include <assert.h>
+
/*
Use an AA-tree to quickly look up properties in objects:
@@ -226,6 +228,21 @@
/* Flatten hierarchy of enumerable properties into an iterator object */
+static js_Iterator *itnewnode(js_State *J, const char *name, js_Iterator *next) {+ js_Iterator *node = js_malloc(J, offsetof(js_Iterator, buf));
+ node->name = name;
+ node->next = next;
+ return node;
+}
+
+static js_Iterator *itnewnodeix(js_State *J, int ix) {+ js_Iterator *node = js_malloc(J, sizeof(js_Iterator));
+ js_itoa(node->buf, ix);
+ node->name = node->buf;
+ node->next = NULL;
+ return node;
+}
+
static js_Iterator *itwalk(js_State *J, js_Iterator *iter, js_Property *prop, js_Object *seen)
 {if (prop->right != &sentinel)
@@ -232,10 +249,7 @@
iter = itwalk(J, iter, prop->right, seen);
 	if (!(prop->atts & JS_DONTENUM)) { 		if (!seen || !jsV_getenumproperty(J, seen, prop->name)) {- js_Iterator *head = js_malloc(J, sizeof *head);
- head->name = prop->name;
- head->next = iter;
- iter = head;
+ iter = itnewnode(J, prop->name, iter);
}
}
if (prop->left != &sentinel)
@@ -266,6 +280,7 @@
 	} else {io->u.iter.head = itflatten(J, obj);
}
+
 	if (obj->type == JS_CSTRING) {js_Iterator *tail = io->u.iter.head;
if (tail)
@@ -274,9 +289,7 @@
 		for (k = 0; k < obj->u.s.length; ++k) {js_itoa(buf, k);
 			if (!jsV_getenumproperty(J, obj, buf)) {- js_Iterator *node = js_malloc(J, sizeof *node);
- node->name = js_intern(J, js_itoa(buf, k));
- node->next = NULL;
+ js_Iterator *node = itnewnodeix(J, k);
if (!tail)
io->u.iter.head = tail = node;
 				else {@@ -286,6 +299,23 @@
}
}
}
+
+	if (obj->type == JS_CARRAY && obj->u.a.simple) {+ js_Iterator *tail = io->u.iter.head;
+ if (tail)
+ while (tail->next)
+ tail = tail->next;
+		for (k = 0; k < obj->u.a.length; ++k) {+ js_Iterator *node = itnewnodeix(J, k);
+ if (!tail)
+ io->u.iter.head = tail = node;
+			else {+ tail->next = node;
+ tail = node;
+ }
+ }
+ }
+
return io;
}
@@ -304,6 +334,9 @@
if (io->u.iter.target->type == JS_CSTRING)
if (js_isarrayindex(J, name, &k) && k < io->u.iter.target->u.s.length)
return name;
+ if (io->u.iter.target->type == JS_CARRAY && io->u.iter.target->u.a.simple)
+ if (js_isarrayindex(J, name, &k) && k < io->u.iter.target->u.a.length)
+ return name;
}
return NULL;
}
@@ -315,6 +348,7 @@
char buf[32];
const char *s;
int k;
+ assert(!obj->u.a.simple);
 	if (newlen < obj->u.a.length) { 		if (obj->u.a.length > obj->count * 2) {js_Object *it = jsV_newiterator(J, obj, 1);
--- a/jsrun.c
+++ b/jsrun.c
@@ -5,6 +5,8 @@
#include "utf.h"
+#include <assert.h>
+
static void jsR_run(js_State *J, js_Function *F);
/* Push values on stack */
@@ -517,6 +519,28 @@
}
}
+void jsR_unflattenarray(js_State *J, js_Object *obj) {+	if (obj->type == JS_CARRAY && obj->u.a.simple) {+ js_Property *ref;
+ int i;
+ char name[32];
+		if (js_try(J)) {+ obj->properties = NULL;
+ js_throw(J);
+ }
+		for (i = 0; i < obj->u.a.length; ++i) {+ js_itoa(name, i);
+ ref = jsV_setproperty(J, obj, name);
+ ref->value = obj->u.a.array[i];
+ }
+ js_free(J, obj->u.a.array);
+ obj->u.a.simple = 0;
+ obj->u.a.capacity = 0;
+ obj->u.a.array = NULL;
+ js_endtry(J);
+ }
+}
+
static int jsR_hasproperty(js_State *J, js_Object *obj, const char *name)
 {js_Property *ref;
@@ -527,6 +551,14 @@
js_pushnumber(J, obj->u.a.length);
return 1;
}
+		if (obj->u.a.simple) {+			if (js_isarrayindex(J, name, &k)) {+				if (k >= 0 && k < obj->u.a.length) {+ js_pushvalue(J, obj->u.a.array[k]);
+ return 1;
+ }
+ }
+ }
}
 	else if (obj->type == JS_CSTRING) {@@ -591,6 +623,45 @@
js_pushundefined(J);
}
+static int jsR_hasindex(js_State *J, js_Object *obj, int k)
+{+ char buf[32];
+	if (obj->type == JS_CARRAY && obj->u.a.simple && k >= 0 && k < obj->u.a.length) {+ js_pushvalue(J, obj->u.a.array[k]);
+ return 1;
+ }
+ return jsR_hasproperty(J, obj, js_itoa(buf, k));
+}
+
+static void jsR_getindex(js_State *J, js_Object *obj, int k)
+{+ if (!jsR_hasindex(J, obj, k))
+ js_pushundefined(J);
+}
+
+static void jsR_setarrayindex(js_State *J, js_Object *obj, int k, js_Value *value)
+{+ int newlen = k + 1;
+ assert(obj->u.a.simple);
+ assert(k >= 0);
+ if (newlen > JS_ARRAYLIMIT)
+ js_rangeerror(J, "array too large");
+	if (newlen > obj->u.a.length) {+ assert(newlen == obj->u.a.length + 1);
+		if (newlen > obj->u.a.capacity) {+ int newcap = obj->u.a.capacity;
+ if (newcap == 0)
+ newcap = 8;
+ while (newcap < newlen)
+ newcap <<= 1;
+ obj->u.a.array = js_realloc(J, obj->u.a.array, newcap * sizeof(js_Value));
+ obj->u.a.capacity = newcap;
+ }
+ obj->u.a.length = newlen;
+ }
+ obj->u.a.array[k] = *value;
+}
+
static void jsR_setproperty(js_State *J, js_Object *obj, const char *name, int transient)
 {js_Value *value = stackidx(J, -1);
@@ -604,12 +675,30 @@
int newlen = jsV_numbertointeger(rawlen);
if (newlen != rawlen || newlen < 0)
js_rangeerror(J, "invalid array length");
+ if (newlen > JS_ARRAYLIMIT)
+ js_rangeerror(J, "array too large");
+			if (obj->u.a.simple) {+				if (newlen <= obj->u.a.length) {+ obj->u.a.length = newlen;
+ return;
+ }
+ jsR_unflattenarray(J, obj);
+ }
jsV_resizearray(J, obj, newlen);
return;
}
- if (js_isarrayindex(J, name, &k))
- if (k >= obj->u.a.length)
+
+		if (js_isarrayindex(J, name, &k)) {+			if (obj->u.a.simple) {+				if (k >= 0 && k <= obj->u.a.length) {+ jsR_setarrayindex(J, obj, k, value);
+ return;
+ }
+ jsR_unflattenarray(J, obj);
+ }
+ if (k + 1 > obj->u.a.length)
obj->u.a.length = k + 1;
+ }
}
 	else if (obj->type == JS_CSTRING) {@@ -679,6 +768,16 @@
js_typeerror(J, "'%s' is read-only", name);
}
+static void jsR_setindex(js_State *J, js_Object *obj, int k, int transient)
+{+ char buf[32];
+	if (obj->type == JS_CARRAY && obj->u.a.simple && k >= 0 && k <= obj->u.a.length) {+ jsR_setarrayindex(J, obj, k, stackidx(J, -1));
+	} else {+ jsR_setproperty(J, obj, js_itoa(buf, k), transient);
+ }
+}
+
static void jsR_defproperty(js_State *J, js_Object *obj, const char *name,
int atts, js_Value *value, js_Object *getter, js_Object *setter,
int throw)
@@ -689,6 +788,8 @@
 	if (obj->type == JS_CARRAY) {if (!strcmp(name, "length"))
goto readonly;
+ if (obj->u.a.simple)
+ jsR_unflattenarray(J, obj);
}
 	else if (obj->type == JS_CSTRING) {@@ -750,6 +851,8 @@
 	if (obj->type == JS_CARRAY) {if (!strcmp(name, "length"))
goto dontconf;
+ if (obj->u.a.simple)
+ jsR_unflattenarray(J, obj);
}
 	else if (obj->type == JS_CSTRING) {@@ -889,6 +992,28 @@
return jsR_hasproperty(J, js_toobject(J, idx), name);
}
+void js_getindex(js_State *J, int idx, int i)
+{+ jsR_getindex(J, js_toobject(J, idx), i);
+}
+
+int js_hasindex(js_State *J, int idx, int i)
+{+ return jsR_hasindex(J, js_toobject(J, idx), i);
+}
+
+void js_setindex(js_State *J, int idx, int i)
+{+ jsR_setindex(J, js_toobject(J, idx), i, !js_isobject(J, idx));
+ js_pop(J, 1);
+}
+
+void js_delindex(js_State *J, int idx, int i)
+{+ char buf[32];
+ js_delproperty(J, idx, js_itoa(buf, i));
+}
+
/* Iterator */
void js_pushiterator(js_State *J, int idx, int own)
@@ -1360,6 +1485,16 @@
js_stacktrace(J);
}
+static int jsR_isindex(js_State *J, int idx, int *k)
+{+ js_Value *v = stackidx(J, idx);
+	if (v->type == JS_TNUMBER) {+ *k = v->u.number;
+ return *k == v->u.number && *k >= 0;
+ }
+ return 0;
+}
+
static void jsR_run(js_State *J, js_Function *F)
 {js_Function **FT = F->funtab;
@@ -1532,9 +1667,14 @@
break;
case OP_GETPROP:
- str = js_tostring(J, -1);
- obj = js_toobject(J, -2);
- jsR_getproperty(J, obj, str);
+			if (jsR_isindex(J, -1, &ix)) {+ obj = js_toobject(J, -2);
+ jsR_getindex(J, obj, ix);
+			} else {+ str = js_tostring(J, -1);
+ obj = js_toobject(J, -2);
+ jsR_getproperty(J, obj, str);
+ }
js_rot3pop2(J);
break;
@@ -1546,10 +1686,16 @@
break;
case OP_SETPROP:
- str = js_tostring(J, -2);
- obj = js_toobject(J, -3);
- transient = !js_isobject(J, -3);
- jsR_setproperty(J, obj, str, transient);
+			if (jsR_isindex(J, -2, &ix)) {+ obj = js_toobject(J, -3);
+ transient = !js_isobject(J, -3);
+ jsR_setindex(J, obj, ix, transient);
+			} else {+ str = js_tostring(J, -2);
+ obj = js_toobject(J, -3);
+ transient = !js_isobject(J, -3);
+ jsR_setproperty(J, obj, str, transient);
+ }
js_rot3pop2(J);
break;
--- a/jsvalue.c
+++ b/jsvalue.c
@@ -423,7 +423,9 @@
void js_newarray(js_State *J)
 {- js_pushobject(J, jsV_newobject(J, JS_CARRAY, J->Array_prototype));
+ js_Object *obj = jsV_newobject(J, JS_CARRAY, J->Array_prototype);
+ obj->u.a.simple = 1;
+ js_pushobject(J, obj);
}
void js_newboolean(js_State *J, int v)
--- a/jsvalue.h
+++ b/jsvalue.h
@@ -93,6 +93,9 @@
} s;
 		struct {int length;
+ int simple; // true if array has only non-sparse array properties
+ int capacity;
+ js_Value *array;
} a;
 		struct {js_Function *function;
@@ -140,6 +143,7 @@
 {const char *name;
js_Iterator *next;
+ char buf[12]; /* for integer iterators */
};
/* jsrun.c */
@@ -149,6 +153,7 @@
js_Object *js_toobject(js_State *J, int idx);
void js_pushvalue(js_State *J, js_Value v);
void js_pushobject(js_State *J, js_Object *v);
+void jsR_unflattenarray(js_State *J, js_Object *obj);
/* jsvalue.c */
int jsV_toboolean(js_State *J, js_Value *v);
@@ -158,7 +163,7 @@
js_Object *jsV_toobject(js_State *J, js_Value *v);
void jsV_toprimitive(js_State *J, js_Value *v, int preferred);
-const char *js_itoa(char buf[32], int a);
+const char *js_itoa(char *buf, int a);
double js_stringtofloat(const char *s, char **ep);
int jsV_numbertointeger(double n);
int jsV_numbertoint32(double n);
@@ -181,6 +186,9 @@
const char *jsV_nextiterator(js_State *J, js_Object *iter);
void jsV_resizearray(js_State *J, js_Object *obj, int newlen);
+
+void jsV_unflattenarray(js_State *J, js_Object *obj);
+void jsV_growarray(js_State *J, js_Object *obj);
/* jsdump.c */
void js_dumpobject(js_State *J, js_Object *obj);
--
⑨