shithub: tinygl

--- a/Raw_Demos/bigfont.c

+++ b/Raw_Demos/bigfont.c

@@ -3,7 +3,7 @@

 #include <string.h>

 #include "../include/GL/gl.h"

 #include "../src/font8x8_basic.h"

-#include "stringutil.h"

+#include "../include-demo/stringutil.h"

 char fillchar = '#';

 void render(char *bitmap, int x) {

--- a/Raw_Demos/gears.c

+++ b/Raw_Demos/gears.c

@@ -24,7 +24,7 @@

 #define CHAD_MATH_IMPL

 //Drags in Math and String (which are already dragged in above.)

-#include "include/3dMath.h"

+#include "../include-demo/3dMath.h"

 //Requires

/*

@@ -39,7 +39,7 @@

*/

 #define STBIW_ASSERT(x) /* a comment */

 #define STB_IMAGE_WRITE_IMPLEMENTATION

-#include "include/stb_image_write.h"

+#include "../include-demo/stb_image_write.h"

 typedef unsigned char uchar;

--- a/Raw_Demos/include/3dMath.h

+++ /dev/null

@@ -1,554 +1,0 @@

-/* Public Domain / CC0 C99 Vector Math Library

-*/

-#ifndef CHAD_MATH_H

-#define CHAD_MATH_H

-//#define CHAD_MATH_NO_ALIGN

-#ifndef CHAD_MATH_NO_ALIGN

-#include <stdalign.h>

-#define CHAD_ALIGN alignas(16)

-#else

-#define CHAD_ALIGN /*a comment*/

-#endif

-#include <math.h>

-#include <string.h>

-typedef float f_;

-typedef unsigned int uint;

-#define MAX(x,y) (x>y?x:y)

-#define MIN(x,y) (x<y?x:y)

-typedef struct {CHAD_ALIGN f_ d[3];} vec3;

-typedef struct {CHAD_ALIGN int d[3];} ivec3;

-typedef struct {CHAD_ALIGN f_ d[4];} vec4;

-typedef struct {CHAD_ALIGN f_ d[16];} mat4;

-//Collision detection

-//These Algorithms return the penetration vector into

-//the shape in the first argument

-//With depth of penetration in element 4

-//if depth of penetration is zero or lower then there is no penetration.

-typedef struct{

-	vec4 c;

-	vec3 e;

-}aabb;

-typedef aabb colshape; //c.d[3] determines if it's a sphere or box. 0 or less = box, greater than 0 = sphere

-static inline vec4 getrow( mat4 a,  uint index){

-	return (vec4){

-		.d[0]=a.d[0*4+index],

-		.d[1]=a.d[1*4+index],

-		.d[2]=a.d[2*4+index],

-		.d[3]=a.d[3*4+index]

-	};

-}

-static inline mat4 swapRowColumnMajor( mat4 in){

-	mat4 result;

-	vec4 t;

-	int i = 0;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);i++;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);i++;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);i++;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);

-	return result;

-}

-static inline vec4 getcol( mat4 a,  uint index){

-	return (vec4){

-		.d[0]=a.d[index*4+0],

-		.d[1]=a.d[index*4+1],

-		.d[2]=a.d[index*4+2],

-		.d[3]=a.d[index*4+3]

-	};

-}

-static inline mat4 scalemat4( vec4 s){

-	mat4 ret;

-	for(int i = 1; i < 16; i++)

-		ret.d[i]= 0.0;

-	ret.d[0*4 + 0] = s.d[0]; //x scale

-	ret.d[1*4 + 1] = s.d[1]; //y scale

-	ret.d[2*4 + 2] = s.d[2]; //z scale

-	ret.d[3*4 + 3] = s.d[3]; //w scale

-	return ret;

-}

-static inline int invmat4( mat4 m, mat4* invOut) //returns 1 if successful

-{

-    mat4 inv;

-    f_ det;

-    int i;

-    inv.d[0] = m.d[5]  * m.d[10] * m.d[15] -

-             m.d[5]  * m.d[11] * m.d[14] -

-             m.d[9]  * m.d[6]  * m.d[15] +

-             m.d[9]  * m.d[7]  * m.d[14] +

-             m.d[13] * m.d[6]  * m.d[11] -

-             m.d[13] * m.d[7]  * m.d[10];

-    inv.d[4] = -m.d[4]  * m.d[10] * m.d[15] +

-              m.d[4]  * m.d[11] * m.d[14] +

-              m.d[8]  * m.d[6]  * m.d[15] -

-              m.d[8]  * m.d[7]  * m.d[14] -

-              m.d[12] * m.d[6]  * m.d[11] +

-              m.d[12] * m.d[7]  * m.d[10];

-    inv.d[8] = m.d[4]  * m.d[9] * m.d[15] -

-             m.d[4]  * m.d[11] * m.d[13] -

-             m.d[8]  * m.d[5] * m.d[15] +

-             m.d[8]  * m.d[7] * m.d[13] +

-             m.d[12] * m.d[5] * m.d[11] -

-             m.d[12] * m.d[7] * m.d[9];

-    inv.d[12] = -m.d[4]  * m.d[9] * m.d[14] +

-               m.d[4]  * m.d[10] * m.d[13] +

-               m.d[8]  * m.d[5] * m.d[14] -

-               m.d[8]  * m.d[6] * m.d[13] -

-               m.d[12] * m.d[5] * m.d[10] +

-               m.d[12] * m.d[6] * m.d[9];

-    inv.d[1] = -m.d[1]  * m.d[10] * m.d[15] +

-              m.d[1]  * m.d[11] * m.d[14] +

-              m.d[9]  * m.d[2] * m.d[15] -

-              m.d[9]  * m.d[3] * m.d[14] -

-              m.d[13] * m.d[2] * m.d[11] +

-              m.d[13] * m.d[3] * m.d[10];

-    inv.d[5] = m.d[0]  * m.d[10] * m.d[15] -

-             m.d[0]  * m.d[11] * m.d[14] -

-             m.d[8]  * m.d[2] * m.d[15] +

-             m.d[8]  * m.d[3] * m.d[14] +

-             m.d[12] * m.d[2] * m.d[11] -

-             m.d[12] * m.d[3] * m.d[10];

-    inv.d[9] = -m.d[0]  * m.d[9] * m.d[15] +

-              m.d[0]  * m.d[11] * m.d[13] +

-              m.d[8]  * m.d[1] * m.d[15] -

-              m.d[8]  * m.d[3] * m.d[13] -

-              m.d[12] * m.d[1] * m.d[11] +

-              m.d[12] * m.d[3] * m.d[9];

-    inv.d[13] = m.d[0]  * m.d[9] * m.d[14] -

-              m.d[0]  * m.d[10] * m.d[13] -

-              m.d[8]  * m.d[1] * m.d[14] +

-              m.d[8]  * m.d[2] * m.d[13] +

-              m.d[12] * m.d[1] * m.d[10] -

-              m.d[12] * m.d[2] * m.d[9];

-    inv.d[2] = m.d[1]  * m.d[6] * m.d[15] -

-             m.d[1]  * m.d[7] * m.d[14] -

-             m.d[5]  * m.d[2] * m.d[15] +

-             m.d[5]  * m.d[3] * m.d[14] +

-             m.d[13] * m.d[2] * m.d[7] -

-             m.d[13] * m.d[3] * m.d[6];

-    inv.d[6] = -m.d[0]  * m.d[6] * m.d[15] +

-              m.d[0]  * m.d[7] * m.d[14] +

-              m.d[4]  * m.d[2] * m.d[15] -

-              m.d[4]  * m.d[3] * m.d[14] -

-              m.d[12] * m.d[2] * m.d[7] +

-              m.d[12] * m.d[3] * m.d[6];

-    inv.d[10] = m.d[0]  * m.d[5] * m.d[15] -

-              m.d[0]  * m.d[7] * m.d[13] -

-              m.d[4]  * m.d[1] * m.d[15] +

-              m.d[4]  * m.d[3] * m.d[13] +

-              m.d[12] * m.d[1] * m.d[7] -

-              m.d[12] * m.d[3] * m.d[5];

-    inv.d[14] = -m.d[0]  * m.d[5] * m.d[14] +

-               m.d[0]  * m.d[6] * m.d[13] +

-               m.d[4]  * m.d[1] * m.d[14] -

-               m.d[4]  * m.d[2] * m.d[13] -

-               m.d[12] * m.d[1] * m.d[6] +

-               m.d[12] * m.d[2] * m.d[5];

-    inv.d[3] = -m.d[1] * m.d[6] * m.d[11] +

-              m.d[1] * m.d[7] * m.d[10] +

-              m.d[5] * m.d[2] * m.d[11] -

-              m.d[5] * m.d[3] * m.d[10] -

-              m.d[9] * m.d[2] * m.d[7] +

-              m.d[9] * m.d[3] * m.d[6];

-    inv.d[7] = m.d[0] * m.d[6] * m.d[11] -

-             m.d[0] * m.d[7] * m.d[10] -

-             m.d[4] * m.d[2] * m.d[11] +

-             m.d[4] * m.d[3] * m.d[10] +

-             m.d[8] * m.d[2] * m.d[7] -

-             m.d[8] * m.d[3] * m.d[6];

-    inv.d[11] = -m.d[0] * m.d[5] * m.d[11] +

-               m.d[0] * m.d[7] * m.d[9] +

-               m.d[4] * m.d[1] * m.d[11] -

-               m.d[4] * m.d[3] * m.d[9] -

-               m.d[8] * m.d[1] * m.d[7] +

-               m.d[8] * m.d[3] * m.d[5];

-    inv.d[15] = m.d[0] * m.d[5] * m.d[10] -

-              m.d[0] * m.d[6] * m.d[9] -

-              m.d[4] * m.d[1] * m.d[10] +

-              m.d[4] * m.d[2] * m.d[9] +

-              m.d[8] * m.d[1] * m.d[6] -

-              m.d[8] * m.d[2] * m.d[5];

-    det = m.d[0] * inv.d[0] + m.d[1] * inv.d[4] + m.d[2] * inv.d[8] + m.d[3] * inv.d[12];

-    if (det == 0)

-        return 0;

-    det = 1.0 / det;

-    for (i = 0; i < 16; i++)

-        invOut->d[i] = inv.d[i] * det;

-    return 1;

-}

-static inline mat4 perspective( f_ fov,  f_ aspect,  f_ near,  f_ far){

-	mat4 ret;

-	f_ D2R = 3.14159265358979323 / 180.0;

-	f_ yScale = 1.0/tanf(D2R * fov/2);

-	f_ xScale = yScale/aspect;

-	f_ nearmfar = near-far;

-	ret.d[0*4+0] = xScale; 	ret.d[0*4+1]=0; 	ret.d[0*4+2]=0;					ret.d[0*4+3]=0;

-	ret.d[1*4+0]=0; 		ret.d[1*4+1]=yScale;ret.d[1*4+2]=0;					ret.d[1*4+3]=0;

-	ret.d[2*4+0]=0; 		ret.d[2*4+1]=0;		ret.d[2*4+2]=(far+near)/nearmfar;ret.d[2*4+3]=-1;

-	ret.d[3*4+0]=0; 		ret.d[3*4+1]=0;		ret.d[3*4+2]=2*far*near/nearmfar;ret.d[3*4+3]=0;

-	/*

-	ret.d[0*4+0] = xScale; 	ret.d[0*4+1]=0; 	ret.d[0*4+2]=0;						ret.d[0*4+3]=0;

-	ret.d[1*4+0]=0; 		ret.d[1*4+1]=yScale;ret.d[1*4+2]=0;						ret.d[1*4+3]=0;

-	ret.d[2*4+0]=0; 		ret.d[2*4+1]=0;		ret.d[2*4+2]=(far+near)/nearmfar;	ret.d[2*4+3]=2*far*near/nearmfar;

-	ret.d[3*4+0]=0; 		ret.d[3*4+1]=0;		ret.d[3*4+2]=-1;					ret.d[3*4+3]=0;

-	*/

-	return ret;

-}

-static inline vec3 viewport( uint xdim,  uint ydim,  vec3 input){

-	input.d[0] += 1;

-	input.d[1] += 1;

-	input.d[0] *= (f_)xdim / 2.0;

-	input.d[1] *= (f_)ydim / 2.0;

-	input.d[2] = (input.d[2])/2.0;

-	return input;

-}

-static inline mat4 rotate( vec3 rotation){

-	f_ a = rotation.d[0];

-	f_ b = rotation.d[1];

-	f_ c = rotation.d[2];

-	mat4 rm;

-	rm.d[0*4 + 0] = cosf(a)*cosf(b);

-	rm.d[1*4 + 0] = sinf(a)*cosf(b);

-	rm.d[2*4 + 0] = -sinf(b);

-	rm.d[0*4 + 1] = cosf(a)*sinf(b)*sinf(c)-sinf(a)*cosf(c);

-	rm.d[1*4 + 1] = sinf(a)*sinf(b)*sinf(c)+cosf(a)*cosf(c);

-	rm.d[2*4 + 1] = cosf(b)*sinf(c);

-	rm.d[0*4 + 2] = cosf(a)*sinf(b)*cosf(c)+sinf(a)*sinf(c);

-	rm.d[1*4 + 2] = sinf(a)*sinf(b)*cosf(c)-cosf(a)*sinf(c);

-	rm.d[2*4 + 2] = cosf(b)*cosf(c);

-	//the other parts

-	rm.d[0*4 + 3] = 0;

-	rm.d[1*4 + 3] = 0;

-	rm.d[2*4 + 3] = 0;

-	rm.d[3*4 + 3] = 1; //the bottom right corner of the matrix.

-	rm.d[3*4 + 0] = 0;

-	rm.d[3*4 + 1] = 0;

-	rm.d[3*4 + 2] = 0;

-	return rm;

-}

-static inline f_ clampf( f_ a,  f_ min,  f_ max){

-	if(a<min) return min;

-	if(a>max) return max;

-	return a;

-}

-static inline f_ lengthv3( vec3 a){

-	return sqrtf(a.d[0] * a.d[0] + a.d[1] * a.d[1] + a.d[2] * a.d[2]);

-}

-static inline f_ lengthv4( vec4 a){

-	return sqrtf(a.d[0] * a.d[0] + a.d[1] * a.d[1] + a.d[2] * a.d[2] + a.d[3] * a.d[3]);

-}

-static inline vec3 multvec3( vec3 a,  vec3 b){

-	return (vec3){

-		.d[0]=a.d[0]*b.d[0],

-		.d[1]=a.d[1]*b.d[1],

-		.d[2]=a.d[2]*b.d[2]

-	};

-}

-static inline vec4 multvec4( vec4 a,  vec4 b){

-	return (vec4){

-		.d[0]=a.d[0]*b.d[0],

-		.d[1]=a.d[1]*b.d[1],

-		.d[2]=a.d[2]*b.d[2],

-		.d[3]=a.d[3]*b.d[3]

-	};

-}

-static inline vec3 clampvec3( vec3 a,  vec3 min,  vec3 max){

-	vec3 ret;

-	ret.d[0] = clampf(a.d[0],min.d[0],max.d[0]);

-	ret.d[1] = clampf(a.d[1],min.d[1],max.d[1]);

-	ret.d[2] = clampf(a.d[2],min.d[2],max.d[2]);

-	return ret;

-}

-static inline vec4 clampvec4( vec4 a,  vec4 min,  vec4 max){

-	vec4 ret;

-	ret.d[0] = clampf(a.d[0],min.d[0],max.d[0]);

-	ret.d[1] = clampf(a.d[1],min.d[1],max.d[1]);

-	ret.d[2] = clampf(a.d[2],min.d[2],max.d[2]);

-	ret.d[3] = clampf(a.d[3],min.d[3],max.d[3]);

-	return ret;

-}

-static inline f_ dotv3( vec3 a,  vec3 b){

-	return a.d[0] * b.d[0] + a.d[1] * b.d[1] + a.d[2] * b.d[2];

-}

-static inline f_ dotv4( vec4 a,  vec4 b){

-	return a.d[0] * b.d[0] + a.d[1] * b.d[1] + a.d[2] * b.d[2] + a.d[3] * b.d[3];

-}

-static inline mat4 multm4( mat4 a,  mat4 b){

-	mat4 ret;

-	for(int i = 0; i < 4; i++)

-	for(int j = 0; j < 4; j++)

-		ret.d[i*4 + j] = dotv4(

-			getrow(a, j),

-			getcol(b, i)

-		);

-	return ret;

-}

-static inline vec4 mat4xvec4( mat4 t,  vec4 v){

-	uint i = 0;

-	vec4 vr;

-	vr.d[0] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	i++;

-	vr.d[1] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	i++;

-	vr.d[2] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	i++;

-	vr.d[3] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	return vr;

-}

-static inline vec3 crossv3( vec3 a,  vec3 b){

-	vec3 retval;

-	retval.d[0] = a.d[1] * b.d[2] - a.d[2] * b.d[1];

-	retval.d[1] = a.d[2] * b.d[0] - a.d[0] * b.d[2];

-	retval.d[2] = a.d[0] * b.d[1] - a.d[1] * b.d[0];

-	return retval;

-}

-static inline vec3 scalev3( f_ s,  vec3 i){i.d[0] *= s; i.d[1] *= s; i.d[2] *= s; return i;}

-static inline vec4 scalev4( f_ s,  vec4 i){i.d[0] *= s; i.d[1] *= s; i.d[2] *= s;i.d[3] *= s; return i;}

-static inline vec3 normalizev3( vec3 a){

-  	if(lengthv3(a)==0) return (vec3){.d[0]=0.0,.d[1]=0.0,.d[2]=1.0};

-	return scalev3(1.0/lengthv3(a), a);

-}

-static inline vec4 normalizev4( vec4 a){

-  	if(lengthv4(a)==0) return (vec4){.d[0]=0.0,.d[1]=0.0,.d[2]=1.0,.d[3]=0.0};

-	return scalev4(1.0/lengthv4(a), a);

-}

-static inline vec3 addv3( vec3 aa,  vec3 b){

-	vec3 a = aa;

-	a.d[0] += b.d[0]; a.d[1] += b.d[1]; a.d[2] += b.d[2]; return a;

-}

-static inline vec3 rotatev3( vec3 in,  vec3 axis,  f_ ang){

-	vec3 t1 = scalev3(cosf(ang),in);

-	vec3 t2 = scalev3(sinf(ang),crossv3(axis,in));

-	vec3 t3 = scalev3((1-cosf(ang))*dotv3(axis,in),axis);

-	return addv3(t1,addv3(t2,t3));

-}

-static inline vec4 addv4( vec4 aa,  vec4 b){

-	vec4 a = aa;

-	a.d[0] += b.d[0]; a.d[1] += b.d[1]; a.d[2] += b.d[2]; a.d[3] += b.d[3]; return a;

-}

-static inline vec3 subv3( vec3 a,  vec3 b){

-	return addv3(a,scalev3(-1,b));

-}

-static inline mat4 identitymat4(){

-	return scalemat4(

-		(vec4){.d[0]=1.0,.d[1]=1.0,.d[2]=1.0,.d[3]=1.0}

-	);

-}

-static inline mat4 translate( vec3 t){

-	mat4 tm = identitymat4();

-	tm.d[3*4+0] = t.d[0];

-	tm.d[3*4+1] = t.d[1];

-	tm.d[3*4+2] = t.d[2];

-	return tm;

-}

-static inline vec4 subv4( vec4 a,  vec4 b){

-	return addv4(a,scalev4(-1,b));

-}

-static inline vec3 reflect( vec3 in,  vec3 norm){

-	return

-	addv3(in, //I +

-		scalev3(-2.0*dotv3(norm, in), //-2.0 * dotv3(norm,in) *

-			norm //N

-		)

-	);

-}

-static inline vec4 upv3( vec3 in,  f_ w){

-	return (vec4){

-		.d[0]=in.d[0],

-		.d[1]=in.d[1],

-		.d[2]=in.d[2],

-		.d[3]=w

-	};

-}

-static inline vec3 downv4( vec4 in){

-	return (vec3){

-		.d[0]=in.d[0],

-		.d[1]=in.d[1],

-		.d[2]=in.d[2]

-	};

-}

-static inline mat4 lookAt( vec3 eye,  vec3 at,  vec3 up){

-	mat4 cw = identitymat4();

-	vec3 zaxis = normalizev3(subv3(at,eye));

-	vec3 xaxis = normalizev3(crossv3(zaxis,up));

-	vec3 yaxis = crossv3(xaxis, zaxis);

-	zaxis = scalev3(-1,zaxis);

-	cw.d[0*4+0] = xaxis.d[0];

-	cw.d[1*4+0] = xaxis.d[1];

-	cw.d[2*4+0] = xaxis.d[2];

-	cw.d[3*4+0] = -dotv3(xaxis,eye);

-	cw.d[0*4+1] = yaxis.d[0];

-	cw.d[1*4+1] = yaxis.d[1];

-	cw.d[2*4+1] = yaxis.d[2];

-	cw.d[3*4+1] = -dotv3(yaxis,eye);

-	cw.d[0*4+2] = zaxis.d[0];

-	cw.d[1*4+2] = zaxis.d[1];

-	cw.d[2*4+2] = zaxis.d[2];

-	cw.d[3*4+2] = -dotv3(zaxis,eye);

-	cw.d[0*4+3] = 0;

-	cw.d[1*4+3] = 0;

-	cw.d[2*4+3] = 0;

-	cw.d[3*4+3] = 1;

-	return cw;

-}

-//Collision detection

-//These Algorithms return the penetration vector into

-//the shape in the first argument

-//With depth of penetration in element 4

-//if depth of penetration is zero or lower then there is no penetration.

-static inline vec4 spherevsphere( vec4 s1,  vec4 s2){ //x,y,z,radius

-	vec4 ret;

-	vec3 diff = subv3(

-				downv4(s2),

-				downv4(s1)

-			);

-	float lv3 = lengthv3(diff);

-	float l = (s1.d[3] + s2.d[3]-lv3);

-	if(l < 0 || lv3 == 0) {

-		ret.d[3] = 0;return ret;

-	}

-	ret = upv3(

-		scalev3(

-			l/lv3,diff

-		)

-		,l

-	);

-	return ret;

-}

-static inline vec4 boxvbox( aabb b1,  aabb b2){ //Just points along the minimum separating axis, Nothing fancy.

-	vec4 ret = (vec4){

-		.d[0]=0,

-		.d[1]=0,

-		.d[2]=0,

-		.d[3]=0

-	};

-	vec3 sumextents = addv3(b1.e,b2.e);

-	vec3 b1c = downv4(b1.c);

-	vec3 b2c = downv4(b2.c);

-	vec3 b1min = subv3(b1c,b1.e);

-	vec3 b2min = subv3(b2c,b2.e);

-	vec3 b1max = addv3(b1c,b1.e);

-	vec3 b2max = addv3(b2c,b2.e);

-	if(

-		!(

-			(fabs(b1c.d[0] - b2c.d[0]) <= sumextents.d[0]) &&

-			(fabs(b1c.d[1] - b2c.d[1]) <= sumextents.d[1]) &&

-			(fabs(b1c.d[2] - b2c.d[2]) <= sumextents.d[2])

-		)

-	){

-		return ret;

-	}

-	vec3 axispen[2];

-	axispen[0] = subv3(b1max,b2min);

-	axispen[1] = subv3(b1min,b2max);

-	ret.d[3] = axispen[0].d[0];

-	ret.d[0] = axispen[0].d[0];

-	for(int i = 1; i < 6; i++){

-		if(fabs(axispen[i/3].d[i%3]) < fabs(ret.d[3])){

-			ret = (vec4){

-						.d[0]=0,

-						.d[1]=0,

-						.d[2]=0,

-						.d[3]=(axispen[i/3].d[i%3])

-					};

-			ret.d[i%3] = ret.d[3];

-			ret.d[3] = fabs(ret.d[3]);

-		}

-	}

-	return ret;

-}

-static inline vec3 closestpointAABB( aabb b,  vec3 p){

-	vec3 b1min = subv3(downv4(b.c),b.e);

-	vec3 b1max = addv3(downv4(b.c),b.e);

-	return clampvec3(p,b1min,b1max);

-}

-static inline vec4 spherevaabb( vec4 sph,  aabb box){

-	vec4 ret;

-	vec3 p = closestpointAABB(box,downv4(sph));

-	vec3 v = subv3(p,downv4(sph));

-	f_ d2 = dotv3(v,v);

-	if(d2 <= sph.d[3] * sph.d[3]){

-		f_ len = lengthv3(v);

-		f_ diff = (sph.d[3] - len);

-		if(len > 0){

-			f_ factor = diff/len;

-			vec3 bruh = scalev3(factor, v);

-			ret = upv3(bruh, diff);

-			return ret;

-		} else {

-			aabb virt;

-			virt.c = sph;

-			virt.e.d[0] = sph.d[3];

-			virt.e.d[1] = sph.d[3];

-			virt.e.d[2] = sph.d[3];

-			return boxvbox(virt,box);

-		}

-	}

-	else

-		return (vec4){

-			.d[0]=0,

-			.d[1]=0,

-			.d[2]=0,

-			.d[3]=0

-		};

-}

-//end of chad math impl

-//END Math_Library.h~~~~~~~~~~~~~~~~~~~~

-#endif

--- a/Raw_Demos/include/stb_image_write.h

+++ /dev/null

@@ -1,1733 +1,0 @@

-/* stb_image_write - v1.14 - public domain - http://nothings.org/stb

-   writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015

-									 no warranty implied; use at your own risk

-   Before #including,

-	   #define STB_IMAGE_WRITE_IMPLEMENTATION

-   in the file that you want to have the implementation.

-   Will probably not work correctly with strict-aliasing optimizations.

-ABOUT:

-   This header file is a library for writing images to C stdio or a callback.

-   The PNG output is not optimal; it is 20-50% larger than the file

-   written by a decent optimizing implementation; though providing a custom

-   zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.

-   This library is designed for source code compactness and simplicity,

-   not optimal image file size or run-time performance.

-BUILDING:

-   You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.

-   You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace

-   malloc,realloc,free.

-   You can #define STBIW_MEMMOVE() to replace memmove()

-   You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress

-function for PNG compression (instead of the builtin one), it must have the

-following signature: unsigned char * my_compress(unsigned char *data, int

-data_len, int *out_len, int quality); The returned data will be freed with

-STBIW_FREE() (free() by default), so it must be heap allocated with

-STBIW_MALLOC() (malloc() by default),

-UNICODE:

-   If compiling for Windows and you wish to use Unicode filenames, compile

-   with

-	   #define STBIW_WINDOWS_UTF8

-   and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert

-   Windows wchar_t filenames to utf8.

-USAGE:

-   There are five functions, one for each image file format:

-	 int stbi_write_png(char const *filename, int w, int h, int comp, const void

-*data, int stride_in_bytes); int stbi_write_bmp(char const *filename, int w, int

-h, int comp, const void *data); int stbi_write_tga(char const *filename, int w,

-int h, int comp, const void *data); int stbi_write_jpg(char const *filename, int

-w, int h, int comp, const void *data, int quality); int stbi_write_hdr(char

-const *filename, int w, int h, int comp, const float *data);

-	 void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip

-data vertically

-   There are also five equivalent functions that use an arbitrary write

-function. You are expected to open/close your file-equivalent before and after

-calling these:

-	 int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int

-h, int comp, const void  *data, int stride_in_bytes); int

-stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int

-comp, const void  *data); int stbi_write_tga_to_func(stbi_write_func *func, void

-*context, int w, int h, int comp, const void  *data); int

-stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int

-comp, const float *data); int stbi_write_jpg_to_func(stbi_write_func *func, void

-*context, int x, int y, int comp, const void *data, int quality);

-   where the callback is:

-	  void stbi_write_func(void *context, void *data, int size);

-   You can configure it with these global variables:

-	  int stbi_write_tga_with_rle;             // defaults to true; set to 0 to

-disable RLE int stbi_write_png_compression_level;    // defaults to 8; set to

-higher for more compression int stbi_write_force_png_filter;         // defaults

-to -1; set to 0..5 to force a filter mode

-   You can define STBI_WRITE_NO_STDIO to disable the file variant of these

-   functions, so the library will not use stdio.h at all. However, this will

-   also disable HDR writing, because it requires stdio for formatted output.

-   Each function returns 0 on failure and non-0 on success.

-   The functions create an image file defined by the parameters. The image

-   is a rectangle of pixels stored from left-to-right, top-to-bottom.

-   Each pixel contains 'comp' channels of data stored interleaved with 8-bits

-   per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is

-   monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.

-   The *data pointer points to the first byte of the top-left-most pixel.

-   For PNG, "stride_in_bytes" is the distance in bytes from the first byte of

-   a row of pixels to the first byte of the next row of pixels.

-   PNG creates output files with the same number of components as the input.

-   The BMP format expands Y to RGB in the file format and does not

-   output alpha.

-   PNG supports writing rectangles of data even when the bytes storing rows of

-   data are not consecutive in memory (e.g. sub-rectangles of a larger image),

-   by supplying the stride between the beginning of adjacent rows. The other

-   formats do not. (Thus you cannot write a native-format BMP through the BMP

-   writer, both because it is in BGR order and because it may have padding

-   at the end of the line.)

-   PNG allows you to set the deflate compression level by setting the global

-   variable 'stbi_write_png_compression_level' (it defaults to 8).

-   HDR expects linear float data. Since the format is always 32-bit rgb(e)

-   data, alpha (if provided) is discarded, and for monochrome data it is

-   replicated across all three channels.

-   TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed

-   data, set the global variable 'stbi_write_tga_with_rle' to 0.

-   JPEG does ignore alpha channels in input data; quality is between 1 and 100.

-   Higher quality looks better but results in a bigger image.

-   JPEG baseline (no JPEG progressive).

-CREDITS:

-   Sean Barrett           -    PNG/BMP/TGA

-   Baldur Karlsson        -    HDR

-   Jean-Sebastien Guay    -    TGA monochrome

-   Tim Kelsey             -    misc enhancements

-   Alan Hickman           -    TGA RLE

-   Emmanuel Julien        -    initial file IO callback implementation

-   Jon Olick              -    original jo_jpeg.cpp code

-   Daniel Gibson          -    integrate JPEG, allow external zlib

-   Aarni Koskela          -    allow choosing PNG filter

-   bugfixes:

-	  github:Chribba

-	  Guillaume Chereau

-	  github:jry2

-	  github:romigrou

-	  Sergio Gonzalez

-	  Jonas Karlsson

-	  Filip Wasil

-	  Thatcher Ulrich

-	  github:poppolopoppo

-	  Patrick Boettcher

-	  github:xeekworx

-	  Cap Petschulat

-	  Simon Rodriguez

-	  Ivan Tikhonov

-	  github:ignotion

-	  Adam Schackart

-LICENSE

-  See end of file for license information.

-*/

-#ifndef INCLUDE_STB_IMAGE_WRITE_H

-#define INCLUDE_STB_IMAGE_WRITE_H

-#include <stdlib.h>

-// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline'

-// or 'static inline'

-#ifndef STBIWDEF

-#ifdef STB_IMAGE_WRITE_STATIC

-#define STBIWDEF static

-#else

-#ifdef __cplusplus

-#define STBIWDEF extern "C"

-#else

-#define STBIWDEF extern

-#endif

-#endif

-#endif

-#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations

-extern int stbi_write_tga_with_rle;

-extern int stbi_write_png_compression_level;

-extern int stbi_write_force_png_filter;

-#endif

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_png(char const* filename, int w, int h, int comp, const void* data, int stride_in_bytes);

-STBIWDEF int stbi_write_bmp(char const* filename, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_tga(char const* filename, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_hdr(char const* filename, int w, int h, int comp, const float* data);

-STBIWDEF int stbi_write_jpg(char const* filename, int x, int y, int comp, const void* data, int quality);

-#ifdef STBI_WINDOWS_UTF8

-STBIWDEF int stbiw_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input);

-#endif

-#endif

-typedef void stbi_write_func(void* context, void* data, int size);

-STBIWDEF int stbi_write_png_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data, int stride_in_bytes);

-STBIWDEF int stbi_write_bmp_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_tga_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_hdr_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const float* data);

-STBIWDEF int stbi_write_jpg_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int quality);

-STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);

-#endif // INCLUDE_STB_IMAGE_WRITE_H

-#ifdef STB_IMAGE_WRITE_IMPLEMENTATION

-#ifdef _WIN32

-#ifndef _CRT_SECURE_NO_WARNINGS

-#define _CRT_SECURE_NO_WARNINGS

-#endif

-#ifndef _CRT_NONSTDC_NO_DEPRECATE

-#define _CRT_NONSTDC_NO_DEPRECATE

-#endif

-#endif

-#ifndef STBI_WRITE_NO_STDIO

-#include <stdio.h>

-#endif // STBI_WRITE_NO_STDIO

-#include <math.h>

-#include <stdarg.h>

-#include <stdlib.h>

-#include <string.h>

-#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))

-// ok

-#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)

-// ok

-#else

-#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."

-#endif

-#ifndef STBIW_MALLOC

-#define STBIW_MALLOC(sz) malloc(sz)

-#define STBIW_REALLOC(p, newsz) realloc(p, newsz)

-#define STBIW_FREE(p) free(p)

-#endif

-#ifndef STBIW_REALLOC_SIZED

-#define STBIW_REALLOC_SIZED(p, oldsz, newsz) STBIW_REALLOC(p, newsz)

-#endif

-#ifndef STBIW_MEMMOVE

-#define STBIW_MEMMOVE(a, b, sz) memmove(a, b, sz)

-#endif

-#ifndef STBIW_ASSERT

-#include <assert.h>

-#define STBIW_ASSERT(x) assert(x)

-#endif

-#define STBIW_UCHAR(x) (unsigned char)((x)&0xff)

-#ifdef STB_IMAGE_WRITE_STATIC

-static int stbi_write_png_compression_level = 8;

-static int stbi_write_tga_with_rle = 1;

-static int stbi_write_force_png_filter = -1;

-#else

-int stbi_write_png_compression_level = 8;

-int stbi_write_tga_with_rle = 1;

-int stbi_write_force_png_filter = -1;

-#endif

-static int stbi__flip_vertically_on_write = 0;

-STBIWDEF void stbi_flip_vertically_on_write(int flag) { stbi__flip_vertically_on_write = flag; }

-typedef struct {

-	stbi_write_func* func;

-	void* context;

-} stbi__write_context;

-// initialize a callback-based context

-static void stbi__start_write_callbacks(stbi__write_context* s, stbi_write_func* c, void* context) {

-	s->func = c;

-	s->context = context;

-}

-#ifndef STBI_WRITE_NO_STDIO

-static void stbi__stdio_write(void* context, void* data, int size) { fwrite(data, 1, size, (FILE*)context); }

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-#ifdef __cplusplus

-#define STBIW_EXTERN extern "C"

-#else

-#define STBIW_EXTERN extern

-#endif

-STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char* str, int cbmb, wchar_t* widestr,

-																	 int cchwide);

-STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t* widestr, int cchwide, char* str,

-																	 int cbmb, const char* defchar, int* used_default);

-STBIWDEF int stbiw_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input) {

-	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);

-}

-#endif

-static FILE* stbiw__fopen(char const* filename, char const* mode) {

-	FILE* f;

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-	wchar_t wMode[64];

-	wchar_t wFilename[1024];

-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))

-		return 0;

-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))

-		return 0;

-#if _MSC_VER >= 1400

-	if (0 != _wfopen_s(&f, wFilename, wMode))

-		f = 0;

-#else

-	f = _wfopen(wFilename, wMode);

-#endif

-#elif defined(_MSC_VER) && _MSC_VER >= 1400

-	if (0 != fopen_s(&f, filename, mode))

-		f = 0;

-#else

-	f = fopen(filename, mode);

-#endif

-	return f;

-}

-static int stbi__start_write_file(stbi__write_context* s, const char* filename) {

-	FILE* f = stbiw__fopen(filename, "wb");

-	stbi__start_write_callbacks(s, stbi__stdio_write, (void*)f);

-	return f != NULL;

-}

-static void stbi__end_write_file(stbi__write_context* s) { fclose((FILE*)s->context); }

-#endif // !STBI_WRITE_NO_STDIO

-typedef unsigned int stbiw_uint32;

-typedef int stb_image_write_test[sizeof(stbiw_uint32) == 4 ? 1 : -1];

-static void stbiw__writefv(stbi__write_context* s, const char* fmt, va_list v) {

-	while (*fmt) {

-		switch (*fmt++) {

-		case ' ':

-			break;

-		case '1': {

-			unsigned char x = STBIW_UCHAR(va_arg(v, int));

-			s->func(s->context, &x, 1);

-			break;

-		}

-		case '2': {

-			int x = va_arg(v, int);

-			unsigned char b[2];

-			b[0] = STBIW_UCHAR(x);

-			b[1] = STBIW_UCHAR(x >> 8);

-			s->func(s->context, b, 2);

-			break;

-		}

-		case '4': {

-			stbiw_uint32 x = va_arg(v, int);

-			unsigned char b[4];

-			b[0] = STBIW_UCHAR(x);

-			b[1] = STBIW_UCHAR(x >> 8);

-			b[2] = STBIW_UCHAR(x >> 16);

-			b[3] = STBIW_UCHAR(x >> 24);

-			s->func(s->context, b, 4);

-			break;

-		}

-		default:

-			STBIW_ASSERT(0);

-			return;

-		}

-	}

-}

-static void stbiw__writef(stbi__write_context* s, const char* fmt, ...) {

-	va_list v;

-	va_start(v, fmt);

-	stbiw__writefv(s, fmt, v);

-	va_end(v);

-}

-static void stbiw__putc(stbi__write_context* s, unsigned char c) { s->func(s->context, &c, 1); }

-static void stbiw__write3(stbi__write_context* s, unsigned char a, unsigned char b, unsigned char c) {

-	unsigned char arr[3];

-	arr[0] = a;

-	arr[1] = b;

-	arr[2] = c;

-	s->func(s->context, arr, 3);

-}

-static void stbiw__write_pixel(stbi__write_context* s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char* d) {

-	unsigned char bg[3] = {255, 0, 255}, px[3];

-	int k;

-	if (write_alpha < 0)

-		s->func(s->context, &d[comp - 1], 1);

-	switch (comp) {

-	case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as

-			// 1-channel case

-	case 1:

-		if (expand_mono)

-			stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp

-		else

-			s->func(s->context, d, 1); // monochrome TGA

-		break;

-	case 4:

-		if (!write_alpha) {

-			// composite against pink background

-			for (k = 0; k < 3; ++k)

-				px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;

-			stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);

-			break;

-		}

-		/* FALLTHROUGH */

-	case 3:

-		stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);

-		break;

-	}

-	if (write_alpha > 0)

-		s->func(s->context, &d[comp - 1], 1);

-}

-static void stbiw__write_pixels(stbi__write_context* s, int rgb_dir, int vdir, int x, int y, int comp, void* data, int write_alpha, int scanline_pad,

-								int expand_mono) {

-	stbiw_uint32 zero = 0;

-	int i, j, j_end;

-	if (y <= 0)

-		return;

-	if (stbi__flip_vertically_on_write)

-		vdir *= -1;

-	if (vdir < 0) {

-		j_end = -1;

-		j = y - 1;

-	} else {

-		j_end = y;

-		j = 0;

-	}

-	for (; j != j_end; j += vdir) {

-		for (i = 0; i < x; ++i) {

-			unsigned char* d = (unsigned char*)data + (j * x + i) * comp;

-			stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);

-		}

-		s->func(s->context, &zero, scanline_pad);

-	}

-}

-static int stbiw__outfile(stbi__write_context* s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void* data, int alpha, int pad,

-						  const char* fmt, ...) {

-	if (y < 0 || x < 0) {

-		return 0;

-	} else {

-		va_list v;

-		va_start(v, fmt);

-		stbiw__writefv(s, fmt, v);

-		va_end(v);

-		stbiw__write_pixels(s, rgb_dir, vdir, x, y, comp, data, alpha, pad, expand_mono);

-		return 1;

-	}

-}

-static int stbi_write_bmp_core(stbi__write_context* s, int x, int y, int comp, const void* data) {

-	int pad = (-x * 3) & 3;

-	return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void*)data, 0, pad,

-						  "11 4 22 4"

-						  "4 44 22 444444",

-						  'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0,

-						  14 + 40,							  // file header

-						  40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header

-}

-STBIWDEF int stbi_write_bmp_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_bmp_core(&s, x, y, comp, data);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_bmp(char const* filename, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_bmp_core(&s, x, y, comp, data);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif //! STBI_WRITE_NO_STDIO

-static int stbi_write_tga_core(stbi__write_context* s, int x, int y, int comp, void* data) {

-	int has_alpha = (comp == 2 || comp == 4);

-	int colorbytes = has_alpha ? comp - 1 : comp;

-	int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3

-	if (y < 0 || x < 0)

-		return 0;

-	if (!stbi_write_tga_with_rle) {

-		return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void*)data, has_alpha, 0, "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y,

-							  (colorbytes + has_alpha) * 8, has_alpha * 8);

-	} else {

-		int i, j, k;

-		int jend, jdir;

-		stbiw__writef(s, "111 221 2222 11", 0, 0, format + 8, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);

-		if (stbi__flip_vertically_on_write) {

-			j = 0;

-			jend = y;

-			jdir = 1;

-		} else {

-			j = y - 1;

-			jend = -1;

-			jdir = -1;

-		}

-		for (; j != jend; j += jdir) {

-			unsigned char* row = (unsigned char*)data + j * x * comp;

-			int len;

-			for (i = 0; i < x; i += len) {

-				unsigned char* begin = row + i * comp;

-				int diff = 1;

-				len = 1;

-				if (i < x - 1) {

-					++len;

-					diff = memcmp(begin, row + (i + 1) * comp, comp);

-					if (diff) {

-						const unsigned char* prev = begin;

-						for (k = i + 2; k < x && len < 128; ++k) {

-							if (memcmp(prev, row + k * comp, comp)) {

-								prev += comp;

-								++len;

-							} else {

-								--len;

-								break;

-							}

-						}

-					} else {

-						for (k = i + 2; k < x && len < 128; ++k) {

-							if (!memcmp(begin, row + k * comp, comp)) {

-								++len;

-							} else {

-								break;

-							}

-						}

-					}

-				}

-				if (diff) {

-					unsigned char header = STBIW_UCHAR(len - 1);

-					s->func(s->context, &header, 1);

-					for (k = 0; k < len; ++k) {

-						stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);

-					}

-				} else {

-					unsigned char header = STBIW_UCHAR(len - 129);

-					s->func(s->context, &header, 1);

-					stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);

-				}

-			}

-		}

-	}

-	return 1;

-}

-STBIWDEF int stbi_write_tga_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_tga_core(&s, x, y, comp, (void*)data);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_tga(char const* filename, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_tga_core(&s, x, y, comp, (void*)data);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif

-// *************************************************************************************************

-// Radiance RGBE HDR writer

-// by Baldur Karlsson

-#define stbiw__max(a, b) ((a) > (b) ? (a) : (b))

-static void stbiw__linear_to_rgbe(unsigned char* rgbe, float* linear) {

-	int exponent;

-	float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));

-	if (maxcomp < 1e-32f) {

-		rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;

-	} else {

-		float normalize = (float)frexp(maxcomp, &exponent) * 256.0f / maxcomp;

-		rgbe[0] = (unsigned char)(linear[0] * normalize);

-		rgbe[1] = (unsigned char)(linear[1] * normalize);

-		rgbe[2] = (unsigned char)(linear[2] * normalize);

-		rgbe[3] = (unsigned char)(exponent + 128);

-	}

-}

-static void stbiw__write_run_data(stbi__write_context* s, int length, unsigned char databyte) {

-	unsigned char lengthbyte = STBIW_UCHAR(length + 128);

-	STBIW_ASSERT(length + 128 <= 255);

-	s->func(s->context, &lengthbyte, 1);

-	s->func(s->context, &databyte, 1);

-}

-static void stbiw__write_dump_data(stbi__write_context* s, int length, unsigned char* data) {

-	unsigned char lengthbyte = STBIW_UCHAR(length);

-	STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code

-	s->func(s->context, &lengthbyte, 1);

-	s->func(s->context, data, length);

-}

-static void stbiw__write_hdr_scanline(stbi__write_context* s, int width, int ncomp, unsigned char* scratch, float* scanline) {

-	unsigned char scanlineheader[4] = {2, 2, 0, 0};

-	unsigned char rgbe[4];

-	float linear[3];

-	int x;

-	scanlineheader[2] = (width & 0xff00) >> 8;

-	scanlineheader[3] = (width & 0x00ff);

-	/* skip RLE for images too small or large */

-	if (width < 8 || width >= 32768) {

-		for (x = 0; x < width; x++) {

-			switch (ncomp) {

-			case 4: /* fallthrough */

-			case 3:

-				linear[2] = scanline[x * ncomp + 2];

-				linear[1] = scanline[x * ncomp + 1];

-				linear[0] = scanline[x * ncomp + 0];

-				break;

-			default:

-				linear[0] = linear[1] = linear[2] = scanline[x * ncomp + 0];

-				break;

-			}

-			stbiw__linear_to_rgbe(rgbe, linear);

-			s->func(s->context, rgbe, 4);

-		}

-	} else {

-		int c, r;

-		/* encode into scratch buffer */

-		for (x = 0; x < width; x++) {

-			switch (ncomp) {

-			case 4: /* fallthrough */

-			case 3:

-				linear[2] = scanline[x * ncomp + 2];

-				linear[1] = scanline[x * ncomp + 1];

-				linear[0] = scanline[x * ncomp + 0];

-				break;

-			default:

-				linear[0] = linear[1] = linear[2] = scanline[x * ncomp + 0];

-				break;

-			}

-			stbiw__linear_to_rgbe(rgbe, linear);

-			scratch[x + width * 0] = rgbe[0];

-			scratch[x + width * 1] = rgbe[1];

-			scratch[x + width * 2] = rgbe[2];

-			scratch[x + width * 3] = rgbe[3];

-		}

-		s->func(s->context, scanlineheader, 4);

-		/* RLE each component separately */

-		for (c = 0; c < 4; c++) {

-			unsigned char* comp = &scratch[width * c];

-			x = 0;

-			while (x < width) {

-				// find first run

-				r = x;

-				while (r + 2 < width) {

-					if (comp[r] == comp[r + 1] && comp[r] == comp[r + 2])

-						break;

-					++r;

-				}

-				if (r + 2 >= width)

-					r = width;

-				// dump up to first run

-				while (x < r) {

-					int len = r - x;

-					if (len > 128)

-						len = 128;

-					stbiw__write_dump_data(s, len, &comp[x]);

-					x += len;

-				}

-				// if there's a run, output it

-				if (r + 2 < width) { // same test as what we break out of in

-									 // search loop, so only true if we break'd

-					// find next byte after run

-					while (r < width && comp[r] == comp[x])

-						++r;

-					// output run up to r

-					while (x < r) {

-						int len = r - x;

-						if (len > 127)

-							len = 127;

-						stbiw__write_run_data(s, len, comp[x]);

-						x += len;

-					}

-				}

-			}

-		}

-	}

-}

-static int stbi_write_hdr_core(stbi__write_context* s, int x, int y, int comp, float* data) {

-	if (y <= 0 || x <= 0 || data == NULL)

-		return 0;

-	else {

-		// Each component is stored separately. Allocate scratch space for full

-		// output scanline.

-		unsigned char* scratch = (unsigned char*)STBIW_MALLOC(x * 4);

-		int i, len;

-		char buffer[128];

-		char header[] = "#?RADIANCE\n# Written by "

-						"stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";

-		s->func(s->context, header, sizeof(header) - 1);

-#ifdef __STDC_WANT_SECURE_LIB__

-		len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);

-#else

-		len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);

-#endif

-		s->func(s->context, buffer, len);

-		for (i = 0; i < y; i++)

-			stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp * x * (stbi__flip_vertically_on_write ? y - 1 - i : i));

-		STBIW_FREE(scratch);

-		return 1;

-	}

-}

-STBIWDEF int stbi_write_hdr_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const float* data) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_hdr_core(&s, x, y, comp, (float*)data);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_hdr(char const* filename, int x, int y, int comp, const float* data) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_hdr_core(&s, x, y, comp, (float*)data);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif // STBI_WRITE_NO_STDIO

-//////////////////////////////////////////////////////////////////////////////

-//

-// PNG writer

-//

-#ifndef STBIW_ZLIB_COMPRESS

-// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount()

-// == vector<>::size()

-#define stbiw__sbraw(a) ((int*)(void*)(a)-2)

-#define stbiw__sbm(a) stbiw__sbraw(a)[0]

-#define stbiw__sbn(a) stbiw__sbraw(a)[1]

-#define stbiw__sbneedgrow(a, n) ((a) == 0 || stbiw__sbn(a) + n >= stbiw__sbm(a))

-#define stbiw__sbmaybegrow(a, n) (stbiw__sbneedgrow(a, (n)) ? stbiw__sbgrow(a, n) : 0)

-#define stbiw__sbgrow(a, n) stbiw__sbgrowf((void**)&(a), (n), sizeof(*(a)))

-#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a, 1), (a)[stbiw__sbn(a)++] = (v))

-#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)

-#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)), 0 : 0)

-static void* stbiw__sbgrowf(void** arr, int increment, int itemsize) {

-	int m = *arr ? 2 * stbiw__sbm(*arr) + increment : increment + 1;

-	void* p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr) * itemsize + sizeof(int) * 2) : 0, itemsize * m + sizeof(int) * 2);

-	STBIW_ASSERT(p);

-	if (p) {

-		if (!*arr)

-			((int*)p)[1] = 0;

-		*arr = (void*)((int*)p + 2);

-		stbiw__sbm(*arr) = m;

-	}

-	return *arr;

-}

-static unsigned char* stbiw__zlib_flushf(unsigned char* data, unsigned int* bitbuffer, int* bitcount) {

-	while (*bitcount >= 8) {

-		stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));

-		*bitbuffer >>= 8;

-		*bitcount -= 8;

-	}

-	return data;

-}

-static int stbiw__zlib_bitrev(int code, int codebits) {

-	int res = 0;

-	while (codebits--) {

-		res = (res << 1) | (code & 1);

-		code >>= 1;

-	}

-	return res;

-}

-static unsigned int stbiw__zlib_countm(unsigned char* a, unsigned char* b, int limit) {

-	int i;

-	for (i = 0; i < limit && i < 258; ++i)

-		if (a[i] != b[i])

-			break;

-	return i;

-}

-static unsigned int stbiw__zhash(unsigned char* data) {

-	stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);

-	hash ^= hash << 3;

-	hash += hash >> 5;

-	hash ^= hash << 4;

-	hash += hash >> 17;

-	hash ^= hash << 25;

-	hash += hash >> 6;

-	return hash;

-}

-#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))

-#define stbiw__zlib_add(code, codebits) (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())

-#define stbiw__zlib_huffa(b, c) stbiw__zlib_add(stbiw__zlib_bitrev(b, c), c)

-// default huffman tables

-#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)

-#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)

-#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256, 7)

-#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280, 8)

-#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))

-#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))

-#define stbiw__ZHASH 16384

-#endif // STBIW_ZLIB_COMPRESS

-STBIWDEF unsigned char* stbi_zlib_compress(unsigned char* data, int data_len, int* out_len, int quality) {

-#ifdef STBIW_ZLIB_COMPRESS

-	// user provided a zlib compress implementation, use that

-	return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);

-#else  // use builtin

-	static unsigned short lengthc[] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 259};

-	static unsigned char lengtheb[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};

-	static unsigned short distc[] = {1,   2,   3,   4,   5,	7,	9,	13,   17,   25,   33,   49,	65,	97,	129,  193,

-									 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32768};

-	static unsigned char disteb[] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};

-	unsigned int bitbuf = 0;

-	int i, j, bitcount = 0;

-	unsigned char* out = NULL;

-	unsigned char*** hash_table = (unsigned char***)STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**));

-	if (hash_table == NULL)

-		return NULL;

-	if (quality < 5)

-		quality = 5;

-	stbiw__sbpush(out, 0x78); // DEFLATE 32K window

-	stbiw__sbpush(out, 0x5e); // FLEVEL = 1

-	stbiw__zlib_add(1, 1);	// BFINAL = 1

-	stbiw__zlib_add(1, 2);	// BTYPE = 1 -- fixed huffman

-	for (i = 0; i < stbiw__ZHASH; ++i)

-		hash_table[i] = NULL;

-	i = 0;

-	while (i < data_len - 3) {

-		// hash next 3 bytes of data to be compressed

-		int h = stbiw__zhash(data + i) & (stbiw__ZHASH - 1), best = 3;

-		unsigned char* bestloc = 0;

-		unsigned char** hlist = hash_table[h];

-		int n = stbiw__sbcount(hlist);

-		for (j = 0; j < n; ++j) {

-			if (hlist[j] - data > i - 32768) { // if entry lies within window

-				int d = stbiw__zlib_countm(hlist[j], data + i, data_len - i);

-				if (d >= best) {

-					best = d;

-					bestloc = hlist[j];

-				}

-			}

-		}

-		// when hash table entry is too long, delete half the entries

-		if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2 * quality) {

-			STBIW_MEMMOVE(hash_table[h], hash_table[h] + quality, sizeof(hash_table[h][0]) * quality);

-			stbiw__sbn(hash_table[h]) = quality;

-		}

-		stbiw__sbpush(hash_table[h], data + i);

-		if (bestloc) {

-			// "lazy matching" - check match at *next* byte, and if it's better,

-			// do cur byte as literal

-			h = stbiw__zhash(data + i + 1) & (stbiw__ZHASH - 1);

-			hlist = hash_table[h];

-			n = stbiw__sbcount(hlist);

-			for (j = 0; j < n; ++j) {

-				if (hlist[j] - data > i - 32767) {

-					int e = stbiw__zlib_countm(hlist[j], data + i + 1, data_len - i - 1);

-					if (e > best) { // if next match is better, bail on current

-									// match

-						bestloc = NULL;

-						break;

-					}

-				}

-			}

-		}

-		if (bestloc) {

-			int d = (int)(data + i - bestloc); // distance back

-			STBIW_ASSERT(d <= 32767 && best <= 258);

-			for (j = 0; best > lengthc[j + 1] - 1; ++j)

-				;

-			stbiw__zlib_huff(j + 257);

-			if (lengtheb[j])

-				stbiw__zlib_add(best - lengthc[j], lengtheb[j]);

-			for (j = 0; d > distc[j + 1] - 1; ++j)

-				;

-			stbiw__zlib_add(stbiw__zlib_bitrev(j, 5), 5);

-			if (disteb[j])

-				stbiw__zlib_add(d - distc[j], disteb[j]);

-			i += best;

-		} else {

-			stbiw__zlib_huffb(data[i]);

-			++i;

-		}

-	}

-	// write out final bytes

-	for (; i < data_len; ++i)

-		stbiw__zlib_huffb(data[i]);

-	stbiw__zlib_huff(256); // end of block

-	// pad with 0 bits to byte boundary

-	while (bitcount)

-		stbiw__zlib_add(0, 1);

-	for (i = 0; i < stbiw__ZHASH; ++i)

-		(void)stbiw__sbfree(hash_table[i]);

-	STBIW_FREE(hash_table);

-	{

-		// compute adler32 on input

-		unsigned int s1 = 1, s2 = 0;

-		int blocklen = (int)(data_len % 5552);

-		j = 0;

-		while (j < data_len) {

-			for (i = 0; i < blocklen; ++i) {

-				s1 += data[j + i];

-				s2 += s1;

-			}

-			s1 %= 65521;

-			s2 %= 65521;

-			j += blocklen;

-			blocklen = 5552;

-		}

-		stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));

-		stbiw__sbpush(out, STBIW_UCHAR(s2));

-		stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));

-		stbiw__sbpush(out, STBIW_UCHAR(s1));

-	}

-	*out_len = stbiw__sbn(out);

-	// make returned pointer freeable

-	STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);

-	return (unsigned char*)stbiw__sbraw(out);

-#endif // STBIW_ZLIB_COMPRESS

-}

-static unsigned int stbiw__crc32(unsigned char* buffer, int len) {

-#ifdef STBIW_CRC32

-	return STBIW_CRC32(buffer, len);

-#else

-	static unsigned int crc_table[256] = {

-		0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,

-		0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,

-		0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,

-		0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,

-		0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,

-		0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,

-		0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,

-		0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,

-		0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,

-		0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,

-		0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,

-		0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,

-		0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,

-		0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,

-		0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,

-		0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,

-		0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,

-		0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,

-		0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,

-		0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,

-		0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,

-		0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D};

-	unsigned int crc = ~0u;

-	int i;

-	for (i = 0; i < len; ++i)

-		crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];

-	return ~crc;

-#endif

-}

-#define stbiw__wpng4(o, a, b, c, d) ((o)[0] = STBIW_UCHAR(a), (o)[1] = STBIW_UCHAR(b), (o)[2] = STBIW_UCHAR(c), (o)[3] = STBIW_UCHAR(d), (o) += 4)

-#define stbiw__wp32(data, v) stbiw__wpng4(data, (v) >> 24, (v) >> 16, (v) >> 8, (v));

-#define stbiw__wptag(data, s) stbiw__wpng4(data, s[0], s[1], s[2], s[3])

-static void stbiw__wpcrc(unsigned char** data, int len) {

-	unsigned int crc = stbiw__crc32(*data - len - 4, len + 4);

-	stbiw__wp32(*data, crc);

-}

-static unsigned char stbiw__paeth(int a, int b, int c) {

-	int p = a + b - c, pa = abs(p - a), pb = abs(p - b), pc = abs(p - c);

-	if (pa <= pb && pa <= pc)

-		return STBIW_UCHAR(a);

-	if (pb <= pc)

-		return STBIW_UCHAR(b);

-	return STBIW_UCHAR(c);

-}

-// @OPTIMIZE: provide an option that always forces left-predict or paeth predict

-static void stbiw__encode_png_line(unsigned char* pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char* line_buffer) {

-	static int mapping[] = {0, 1, 2, 3, 4};

-	static int firstmap[] = {0, 1, 0, 5, 6};

-	int* mymap = (y != 0) ? mapping : firstmap;

-	int i;

-	int type = mymap[filter_type];

-	unsigned char* z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height - 1 - y : y);

-	int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;

-	if (type == 0) {

-		memcpy(line_buffer, z, width * n);

-		return;

-	}

-	// first loop isn't optimized since it's just one pixel

-	for (i = 0; i < n; ++i) {

-		switch (type) {

-		case 1:

-			line_buffer[i] = z[i];

-			break;

-		case 2:

-			line_buffer[i] = z[i] - z[i - signed_stride];

-			break;

-		case 3:

-			line_buffer[i] = z[i] - (z[i - signed_stride] >> 1);

-			break;

-		case 4:

-			line_buffer[i] = (signed char)(z[i] - stbiw__paeth(0, z[i - signed_stride], 0));

-			break;

-		case 5:

-			line_buffer[i] = z[i];

-			break;

-		case 6:

-			line_buffer[i] = z[i];

-			break;

-		}

-	}

-	switch (type) {

-	case 1:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - z[i - n];

-		break;

-	case 2:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - z[i - signed_stride];

-		break;

-	case 3:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - ((z[i - n] + z[i - signed_stride]) >> 1);

-		break;

-	case 4:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - stbiw__paeth(z[i - n], z[i - signed_stride], z[i - signed_stride - n]);

-		break;

-	case 5:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - (z[i - n] >> 1);

-		break;

-	case 6:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - stbiw__paeth(z[i - n], 0, 0);

-		break;

-	}

-}

-STBIWDEF unsigned char* stbi_write_png_to_mem(const unsigned char* pixels, int stride_bytes, int x, int y, int n, int* out_len) {

-	int force_filter = stbi_write_force_png_filter;

-	int ctype[5] = {-1, 0, 4, 2, 6};

-	unsigned char sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};

-	unsigned char *out, *o, *filt, *zlib;

-	signed char* line_buffer;

-	int j, zlen;

-	if (stride_bytes == 0)

-		stride_bytes = x * n;

-	if (force_filter >= 5) {

-		force_filter = -1;

-	}

-	filt = (unsigned char*)STBIW_MALLOC((x * n + 1) * y);

-	if (!filt)

-		return 0;

-	line_buffer = (signed char*)STBIW_MALLOC(x * n);

-	if (!line_buffer) {

-		STBIW_FREE(filt);

-		return 0;

-	}

-	for (j = 0; j < y; ++j) {

-		int filter_type;

-		if (force_filter > -1) {

-			filter_type = force_filter;

-			stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);

-		} else { // Estimate the best filter by running through all of them:

-			int best_filter = 0, best_filter_val = 0x7fffffff, est, i;

-			for (filter_type = 0; filter_type < 5; filter_type++) {

-				stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);

-				// Estimate the entropy of the line using this filter; the less,

-				// the better.

-				est = 0;

-				for (i = 0; i < x * n; ++i) {

-					est += abs((signed char)line_buffer[i]);

-				}

-				if (est < best_filter_val) {

-					best_filter_val = est;

-					best_filter = filter_type;

-				}

-			}

-			if (filter_type != best_filter) { // If the last iteration already got us

-											  // the best filter, don't redo it

-				stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);

-				filter_type = best_filter;

-			}

-		}

-		// when we get here, filter_type contains the filter type, and

-		// line_buffer contains the data

-		filt[j * (x * n + 1)] = (unsigned char)filter_type;

-		STBIW_MEMMOVE(filt + j * (x * n + 1) + 1, line_buffer, x * n);

-	}

-	STBIW_FREE(line_buffer);

-	zlib = stbi_zlib_compress(filt, y * (x * n + 1), &zlen, stbi_write_png_compression_level);

-	STBIW_FREE(filt);

-	if (!zlib)

-		return 0;

-	// each tag requires 12 bytes of overhead

-	out = (unsigned char*)STBIW_MALLOC(8 + 12 + 13 + 12 + zlen + 12);

-	if (!out)

-		return 0;

-	*out_len = 8 + 12 + 13 + 12 + zlen + 12;

-	o = out;

-	STBIW_MEMMOVE(o, sig, 8);

-	o += 8;

-	stbiw__wp32(o, 13); // header length

-	stbiw__wptag(o, "IHDR");

-	stbiw__wp32(o, x);

-	stbiw__wp32(o, y);

-	*o++ = 8;

-	*o++ = STBIW_UCHAR(ctype[n]);

-	*o++ = 0;

-	*o++ = 0;

-	*o++ = 0;

-	stbiw__wpcrc(&o, 13);

-	stbiw__wp32(o, zlen);

-	stbiw__wptag(o, "IDAT");

-	STBIW_MEMMOVE(o, zlib, zlen);

-	o += zlen;

-	STBIW_FREE(zlib);

-	stbiw__wpcrc(&o, zlen);

-	stbiw__wp32(o, 0);

-	stbiw__wptag(o, "IEND");

-	stbiw__wpcrc(&o, 0);

-	STBIW_ASSERT(o == out + *out_len);

-	return out;

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_png(char const* filename, int x, int y, int comp, const void* data, int stride_bytes) {

-	FILE* f;

-	int len;

-	unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len);

-	if (png == NULL)

-		return 0;

-	f = stbiw__fopen(filename, "wb");

-	if (!f) {

-		STBIW_FREE(png);

-		return 0;

-	}

-	fwrite(png, 1, len, f);

-	fclose(f);

-	STBIW_FREE(png);

-	return 1;

-}

-#endif

-STBIWDEF int stbi_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes) {

-	int len;

-	unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len);

-	if (png == NULL)

-		return 0;

-	func(context, png, len);

-	STBIW_FREE(png);

-	return 1;

-}

-/* ***************************************************************************

- *

- * JPEG writer

- *

- * This is based on Jon Olick's jo_jpeg.cpp:

- * public domain Simple, Minimalistic JPEG writer -

- * http://www.jonolick.com/code.html

- */

-static const unsigned char stbiw__jpg_ZigZag[] = {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42, 3,  8,  12, 17, 25, 30,

-												  41, 43, 9,  11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38,

-												  46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};

-static void stbiw__jpg_writeBits(stbi__write_context* s, int* bitBufP, int* bitCntP, const unsigned short* bs) {

-	int bitBuf = *bitBufP, bitCnt = *bitCntP;

-	bitCnt += bs[1];

-	bitBuf |= bs[0] << (24 - bitCnt);

-	while (bitCnt >= 8) {

-		unsigned char c = (bitBuf >> 16) & 255;

-		stbiw__putc(s, c);

-		if (c == 255) {

-			stbiw__putc(s, 0);

-		}

-		bitBuf <<= 8;

-		bitCnt -= 8;

-	}

-	*bitBufP = bitBuf;

-	*bitCntP = bitCnt;

-}

-static void stbiw__jpg_DCT(float* d0p, float* d1p, float* d2p, float* d3p, float* d4p, float* d5p, float* d6p, float* d7p) {

-	float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;

-	float z1, z2, z3, z4, z5, z11, z13;

-	float tmp0 = d0 + d7;

-	float tmp7 = d0 - d7;

-	float tmp1 = d1 + d6;

-	float tmp6 = d1 - d6;

-	float tmp2 = d2 + d5;

-	float tmp5 = d2 - d5;

-	float tmp3 = d3 + d4;

-	float tmp4 = d3 - d4;

-	// Even part

-	float tmp10 = tmp0 + tmp3; // phase 2

-	float tmp13 = tmp0 - tmp3;

-	float tmp11 = tmp1 + tmp2;

-	float tmp12 = tmp1 - tmp2;

-	d0 = tmp10 + tmp11; // phase 3

-	d4 = tmp10 - tmp11;

-	z1 = (tmp12 + tmp13) * 0.707106781f; // c4

-	d2 = tmp13 + z1;					 // phase 5

-	d6 = tmp13 - z1;

-	// Odd part

-	tmp10 = tmp4 + tmp5; // phase 2

-	tmp11 = tmp5 + tmp6;

-	tmp12 = tmp6 + tmp7;

-	// The rotator is modified from fig 4-8 to avoid extra negations.

-	z5 = (tmp10 - tmp12) * 0.382683433f; // c6

-	z2 = tmp10 * 0.541196100f + z5;		 // c2-c6

-	z4 = tmp12 * 1.306562965f + z5;		 // c2+c6

-	z3 = tmp11 * 0.707106781f;			 // c4

-	z11 = tmp7 + z3; // phase 5

-	z13 = tmp7 - z3;

-	*d5p = z13 + z2; // phase 6

-	*d3p = z13 - z2;

-	*d1p = z11 + z4;

-	*d7p = z11 - z4;

-	*d0p = d0;

-	*d2p = d2;

-	*d4p = d4;

-	*d6p = d6;

-}

-static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {

-	int tmp1 = val < 0 ? -val : val;

-	val = val < 0 ? val - 1 : val;

-	bits[1] = 1;

-	while (tmp1 >>= 1) {

-		++bits[1];

-	}

-	bits[0] = val & ((1 << bits[1]) - 1);

-}

-static int stbiw__jpg_processDU(stbi__write_context* s, int* bitBuf, int* bitCnt, float* CDU, int du_stride, float* fdtbl, int DC,

-								const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {

-	const unsigned short EOB[2] = {HTAC[0x00][0], HTAC[0x00][1]};

-	const unsigned short M16zeroes[2] = {HTAC[0xF0][0], HTAC[0xF0][1]};

-	int dataOff, i, j, n, diff, end0pos, x, y;

-	int DU[64];

-	// DCT rows

-	for (dataOff = 0, n = du_stride * 8; dataOff < n; dataOff += du_stride) {

-		stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + 1], &CDU[dataOff + 2], &CDU[dataOff + 3], &CDU[dataOff + 4], &CDU[dataOff + 5], &CDU[dataOff + 6],

-					   &CDU[dataOff + 7]);

-	}

-	// DCT columns

-	for (dataOff = 0; dataOff < 8; ++dataOff) {

-		stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + du_stride], &CDU[dataOff + du_stride * 2], &CDU[dataOff + du_stride * 3], &CDU[dataOff + du_stride * 4],

-					   &CDU[dataOff + du_stride * 5], &CDU[dataOff + du_stride * 6], &CDU[dataOff + du_stride * 7]);

-	}

-	// Quantize/descale/zigzag the coefficients

-	for (y = 0, j = 0; y < 8; ++y) {

-		for (x = 0; x < 8; ++x, ++j) {

-			float v;

-			i = y * du_stride + x;

-			v = CDU[i] * fdtbl[j];

-			// DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) :

-			// floorf(v + 0.5f)); ceilf() and floorf() are C99, not C89, but I

-			// /think/ they're not needed here anyway?

-			DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);

-		}

-	}

-	// Encode DC

-	diff = DU[0] - DC;

-	if (diff == 0) {

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);

-	} else {

-		unsigned short bits[2];

-		stbiw__jpg_calcBits(diff, bits);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);

-	}

-	// Encode ACs

-	end0pos = 63;

-	for (; (end0pos > 0) && (DU[end0pos] == 0); --end0pos) {

-	}

-	// end0pos = first element in reverse order !=0

-	if (end0pos == 0) {

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);

-		return DU[0];

-	}

-	for (i = 1; i <= end0pos; ++i) {

-		int startpos = i;

-		int nrzeroes;

-		unsigned short bits[2];

-		for (; DU[i] == 0 && i <= end0pos; ++i) {

-		}

-		nrzeroes = i - startpos;

-		if (nrzeroes >= 16) {

-			int lng = nrzeroes >> 4;

-			int nrmarker;

-			for (nrmarker = 1; nrmarker <= lng; ++nrmarker)

-				stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);

-			nrzeroes &= 15;

-		}

-		stbiw__jpg_calcBits(DU[i], bits);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes << 4) + bits[1]]);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);

-	}

-	if (end0pos != 63) {

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);

-	}

-	return DU[0];

-}

-static int stbi_write_jpg_core(stbi__write_context* s, int width, int height, int comp, const void* data, int quality) {

-	// Constants that don't pollute global namespace

-	static const unsigned char std_dc_luminance_nrcodes[] = {0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0};

-	static const unsigned char std_dc_luminance_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};

-	static const unsigned char std_ac_luminance_nrcodes[] = {0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d};

-	static const unsigned char std_ac_luminance_values[] = {

-		0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,

-		0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,

-		0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,

-		0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,

-		0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,

-		0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,

-		0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa};

-	static const unsigned char std_dc_chrominance_nrcodes[] = {0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};

-	static const unsigned char std_dc_chrominance_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};

-	static const unsigned char std_ac_chrominance_nrcodes[] = {0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77};

-	static const unsigned char std_ac_chrominance_values[] = {

-		0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,

-		0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,

-		0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,

-		0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,

-		0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,

-		0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,

-		0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa};

-	// Huffman tables

-	static const unsigned short YDC_HT[256][2] = {{0, 2}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}, {254, 8}, {510, 9}};

-	static const unsigned short UVDC_HT[256][2] = {{0, 2},  {1, 2},   {2, 2},   {6, 3},   {14, 4},	{30, 5},

-												   {62, 6}, {126, 7}, {254, 8}, {510, 9}, {1022, 10}, {2046, 11}};

-	static const unsigned short YAC_HT[256][2] = {

-		{10, 4},	 {0, 2},	  {1, 2},	  {4, 3},		{11, 4},	 {26, 5},	 {120, 7},	{248, 8},	{1014, 10},  {65410, 16}, {65411, 16},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {12, 4},	 {27, 5},		{121, 7},	{502, 9},	{2038, 11},

-		{65412, 16}, {65413, 16}, {65414, 16}, {65415, 16}, {65416, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{28, 5},	 {249, 8},	{1015, 10},  {4084, 12},  {65417, 16}, {65418, 16}, {65419, 16}, {65420, 16}, {65421, 16}, {65422, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {58, 6},	 {503, 9},	{4085, 12},  {65423, 16}, {65424, 16}, {65425, 16},

-		{65426, 16}, {65427, 16}, {65428, 16}, {65429, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {59, 6},

-		{1016, 10},  {65430, 16}, {65431, 16}, {65432, 16}, {65433, 16}, {65434, 16}, {65435, 16}, {65436, 16}, {65437, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{122, 7},	{2039, 11},  {65438, 16}, {65439, 16}, {65440, 16}, {65441, 16}, {65442, 16},

-		{65443, 16}, {65444, 16}, {65445, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {123, 7},	{4086, 12},

-		{65446, 16}, {65447, 16}, {65448, 16}, {65449, 16}, {65450, 16}, {65451, 16}, {65452, 16}, {65453, 16}, {0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {250, 8},	{4087, 12},  {65454, 16}, {65455, 16}, {65456, 16}, {65457, 16}, {65458, 16}, {65459, 16},

-		{65460, 16}, {65461, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{504, 9},	{32704, 15}, {65462, 16},

-		{65463, 16}, {65464, 16}, {65465, 16}, {65466, 16}, {65467, 16}, {65468, 16}, {65469, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {505, 9},	{65470, 16}, {65471, 16}, {65472, 16}, {65473, 16}, {65474, 16}, {65475, 16}, {65476, 16}, {65477, 16},

-		{65478, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {506, 9},	{65479, 16}, {65480, 16}, {65481, 16},

-		{65482, 16}, {65483, 16}, {65484, 16}, {65485, 16}, {65486, 16}, {65487, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {1017, 10},  {65488, 16}, {65489, 16}, {65490, 16}, {65491, 16}, {65492, 16}, {65493, 16}, {65494, 16}, {65495, 16}, {65496, 16},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {1018, 10},  {65497, 16}, {65498, 16}, {65499, 16}, {65500, 16},

-		{65501, 16}, {65502, 16}, {65503, 16}, {65504, 16}, {65505, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{2040, 11},  {65506, 16}, {65507, 16}, {65508, 16}, {65509, 16}, {65510, 16}, {65511, 16}, {65512, 16}, {65513, 16}, {65514, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {65515, 16}, {65516, 16}, {65517, 16}, {65518, 16}, {65519, 16}, {65520, 16},

-		{65521, 16}, {65522, 16}, {65523, 16}, {65524, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {2041, 11},  {65525, 16},

-		{65526, 16}, {65527, 16}, {65528, 16}, {65529, 16}, {65530, 16}, {65531, 16}, {65532, 16}, {65533, 16}, {65534, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0}};

-	static const unsigned short UVAC_HT[256][2] = {

-		{0, 2},		 {1, 2},	  {4, 3},	  {10, 4},		{24, 5},	 {25, 5},	 {56, 6},	 {120, 7},	{500, 9},	{1014, 10},  {4084, 12},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {11, 4},	 {57, 6},		{246, 8},	{501, 9},	{2038, 11},

-		{4085, 12},  {65416, 16}, {65417, 16}, {65418, 16}, {65419, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{26, 5},	 {247, 8},	{1015, 10},  {4086, 12},  {32706, 15}, {65420, 16}, {65421, 16}, {65422, 16}, {65423, 16}, {65424, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {27, 5},	 {248, 8},	{1016, 10},  {4087, 12},  {65425, 16}, {65426, 16},

-		{65427, 16}, {65428, 16}, {65429, 16}, {65430, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {58, 6},

-		{502, 9},	{65431, 16}, {65432, 16}, {65433, 16}, {65434, 16}, {65435, 16}, {65436, 16}, {65437, 16}, {65438, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{59, 6},	 {1017, 10},  {65439, 16}, {65440, 16}, {65441, 16}, {65442, 16}, {65443, 16},

-		{65444, 16}, {65445, 16}, {65446, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {121, 7},	{2039, 11},

-		{65447, 16}, {65448, 16}, {65449, 16}, {65450, 16}, {65451, 16}, {65452, 16}, {65453, 16}, {65454, 16}, {0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {122, 7},	{2040, 11},  {65455, 16}, {65456, 16}, {65457, 16}, {65458, 16}, {65459, 16}, {65460, 16},

-		{65461, 16}, {65462, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{249, 8},	{65463, 16}, {65464, 16},

-		{65465, 16}, {65466, 16}, {65467, 16}, {65468, 16}, {65469, 16}, {65470, 16}, {65471, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {503, 9},	{65472, 16}, {65473, 16}, {65474, 16}, {65475, 16}, {65476, 16}, {65477, 16}, {65478, 16}, {65479, 16},

-		{65480, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {504, 9},	{65481, 16}, {65482, 16}, {65483, 16},

-		{65484, 16}, {65485, 16}, {65486, 16}, {65487, 16}, {65488, 16}, {65489, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {505, 9},	{65490, 16}, {65491, 16}, {65492, 16}, {65493, 16}, {65494, 16}, {65495, 16}, {65496, 16}, {65497, 16}, {65498, 16},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {506, 9},	{65499, 16}, {65500, 16}, {65501, 16}, {65502, 16},

-		{65503, 16}, {65504, 16}, {65505, 16}, {65506, 16}, {65507, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{2041, 11},  {65508, 16}, {65509, 16}, {65510, 16}, {65511, 16}, {65512, 16}, {65513, 16}, {65514, 16}, {65515, 16}, {65516, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {16352, 14}, {65517, 16}, {65518, 16}, {65519, 16}, {65520, 16}, {65521, 16},

-		{65522, 16}, {65523, 16}, {65524, 16}, {65525, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {1018, 10},  {32707, 15},

-		{65526, 16}, {65527, 16}, {65528, 16}, {65529, 16}, {65530, 16}, {65531, 16}, {65532, 16}, {65533, 16}, {65534, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0}};

-	static const int YQT[] = {16, 11,  10,  16, 24, 40, 51, 61, 12,  12,  14,  19,  26, 58, 60, 55,  14,  13,  16,  24, 40, 57,

-							  69, 56,  14,  17, 22, 29, 51, 87, 80,  62,  18,  22,  37, 56, 68, 109, 103, 77,  24,  35, 55, 64,

-							  81, 104, 113, 92, 49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98,  112, 100, 103, 99};

-	static const int UVQT[] = {17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, 24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99,

-							   99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99};

-	static const float aasf[] = {1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,

-								 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f};

-	int row, col, i, k, subsample;

-	float fdtbl_Y[64], fdtbl_UV[64];

-	unsigned char YTable[64], UVTable[64];

-	if (!data || !width || !height || comp > 4 || comp < 1) {

-		return 0;

-	}

-	quality = quality ? quality : 90;

-	subsample = quality <= 90 ? 1 : 0;

-	quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;

-	quality = quality < 50 ? 5000 / quality : 200 - quality * 2;

-	for (i = 0; i < 64; ++i) {

-		int uvti, yti = (YQT[i] * quality + 50) / 100;

-		YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti);

-		uvti = (UVQT[i] * quality + 50) / 100;

-		UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);

-	}

-	for (row = 0, k = 0; row < 8; ++row) {

-		for (col = 0; col < 8; ++col, ++k) {

-			fdtbl_Y[k] = 1 / (YTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);

-			fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);

-		}

-	}

-	// Write Headers

-	{

-		static const unsigned char head0[] = {0xFF, 0xD8, 0xFF, 0xE0, 0, 0x10, 'J', 'F', 'I', 'F', 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0xFF, 0xDB, 0, 0x84, 0};

-		static const unsigned char head2[] = {0xFF, 0xDA, 0, 0xC, 3, 1, 0, 2, 0x11, 3, 0x11, 0, 0x3F, 0};

-		const unsigned char head1[] = {0xFF,

-									   0xC0,

-									   0,

-									   0x11,

-									   8,

-									   (unsigned char)(height >> 8),

-									   STBIW_UCHAR(height),

-									   (unsigned char)(width >> 8),

-									   STBIW_UCHAR(width),

-									   3,

-									   1,

-									   (unsigned char)(subsample ? 0x22 : 0x11),

-									   0,

-									   2,

-									   0x11,

-									   1,

-									   3,

-									   0x11,

-									   1,

-									   0xFF,

-									   0xC4,

-									   0x01,

-									   0xA2,

-									   0};

-		s->func(s->context, (void*)head0, sizeof(head0));

-		s->func(s->context, (void*)YTable, sizeof(YTable));

-		stbiw__putc(s, 1);

-		s->func(s->context, UVTable, sizeof(UVTable));

-		s->func(s->context, (void*)head1, sizeof(head1));

-		s->func(s->context, (void*)(std_dc_luminance_nrcodes + 1), sizeof(std_dc_luminance_nrcodes) - 1);

-		s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));

-		stbiw__putc(s, 0x10); // HTYACinfo

-		s->func(s->context, (void*)(std_ac_luminance_nrcodes + 1), sizeof(std_ac_luminance_nrcodes) - 1);

-		s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));

-		stbiw__putc(s, 1); // HTUDCinfo

-		s->func(s->context, (void*)(std_dc_chrominance_nrcodes + 1), sizeof(std_dc_chrominance_nrcodes) - 1);

-		s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));

-		stbiw__putc(s, 0x11); // HTUACinfo

-		s->func(s->context, (void*)(std_ac_chrominance_nrcodes + 1), sizeof(std_ac_chrominance_nrcodes) - 1);

-		s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));

-		s->func(s->context, (void*)head2, sizeof(head2));

-	}

-	// Encode 8x8 macroblocks

-	{

-		static const unsigned short fillBits[] = {0x7F, 7};

-		int DCY = 0, DCU = 0, DCV = 0;

-		int bitBuf = 0, bitCnt = 0;

-		// comp == 2 is grey+alpha (alpha is ignored)

-		int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;

-		const unsigned char* dataR = (const unsigned char*)data;

-		const unsigned char* dataG = dataR + ofsG;

-		const unsigned char* dataB = dataR + ofsB;

-		int x, y, pos;

-		if (subsample) {

-			for (y = 0; y < height; y += 16) {

-				for (x = 0; x < width; x += 16) {

-					float Y[256], U[256], V[256];

-					for (row = y, pos = 0; row < y + 16; ++row) {

-						// row >= height => use last input row

-						int clamped_row = (row < height) ? row : height - 1;

-						int base_p = (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) : clamped_row) * width * comp;

-						for (col = x; col < x + 16; ++col, ++pos) {

-							// if col >= width => use pixel from last input

-							// column

-							int p = base_p + ((col < width) ? col : (width - 1)) * comp;

-							float r = dataR[p], g = dataG[p], b = dataB[p];

-							Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128;

-							U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b;

-							V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b;

-						}

-					}

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					// subsample U,V

-					{

-						float subU[64], subV[64];

-						int yy, xx;

-						for (yy = 0, pos = 0; yy < 8; ++yy) {

-							for (xx = 0; xx < 8; ++xx, ++pos) {

-								int j = yy * 32 + xx * 2;

-								subU[pos] = (U[j + 0] + U[j + 1] + U[j + 16] + U[j + 17]) * 0.25f;

-								subV[pos] = (V[j + 0] + V[j + 1] + V[j + 16] + V[j + 17]) * 0.25f;

-							}

-						}

-						DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);

-						DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);

-					}

-				}

-			}

-		} else {

-			for (y = 0; y < height; y += 8) {

-				for (x = 0; x < width; x += 8) {

-					float Y[64], U[64], V[64];

-					for (row = y, pos = 0; row < y + 8; ++row) {

-						// row >= height => use last input row

-						int clamped_row = (row < height) ? row : height - 1;

-						int base_p = (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) : clamped_row) * width * comp;

-						for (col = x; col < x + 8; ++col, ++pos) {

-							// if col >= width => use pixel from last input

-							// column

-							int p = base_p + ((col < width) ? col : (width - 1)) * comp;

-							float r = dataR[p], g = dataG[p], b = dataB[p];

-							Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128;

-							U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b;

-							V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b;

-						}

-					}

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);

-					DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);

-				}

-			}

-		}

-		// Do the bit alignment of the EOI marker

-		stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);

-	}

-	// EOI

-	stbiw__putc(s, 0xFF);

-	stbiw__putc(s, 0xD9);

-	return 1;

-}

-STBIWDEF int stbi_write_jpg_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int quality) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_jpg_core(&s, x, y, comp, (void*)data, quality);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_jpg(char const* filename, int x, int y, int comp, const void* data, int quality) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif

-#endif // STB_IMAGE_WRITE_IMPLEMENTATION

-/* Revision history

-	  1.14  (2020-02-02) updated JPEG writer to downsample chroma channels

-	  1.13

-	  1.12

-	  1.11  (2019-08-11)

-	  1.10  (2019-02-07)

-			 support utf8 filenames in Windows; fix warnings and platform ifdefs

-	  1.09  (2018-02-11)

-			 fix typo in zlib quality API, improve STB_I_W_STATIC in C++

-	  1.08  (2018-01-29)

-			 add stbi__flip_vertically_on_write, external zlib, zlib quality,

-   choose PNG filter 1.07  (2017-07-24) doc fix 1.06 (2017-07-23) writing JPEG

-   (using Jon Olick's code) 1.05   ??? 1.04 (2017-03-03) monochrome BMP

-   expansion 1.03   ??? 1.02 (2016-04-02) avoid allocating large structures on

-   the stack 1.01 (2016-01-16) STBIW_REALLOC_SIZED: support allocators with no

-   realloc support avoid race-condition in crc initialization minor compile

-   issues 1.00 (2015-09-14) installable file IO function 0.99 (2015-09-13)

-			 warning fixes; TGA rle support

-	  0.98 (2015-04-08)

-			 added STBIW_MALLOC, STBIW_ASSERT etc

-	  0.97 (2015-01-18)

-			 fixed HDR asserts, rewrote HDR rle logic

-	  0.96 (2015-01-17)

-			 add HDR output

-			 fix monochrome BMP

-	  0.95 (2014-08-17)

-					   add monochrome TGA output

-	  0.94 (2014-05-31)

-			 rename private functions to avoid conflicts with stb_image.h

-	  0.93 (2014-05-27)

-			 warning fixes

-	  0.92 (2010-08-01)

-			 casts to unsigned char to fix warnings

-	  0.91 (2010-07-17)

-			 first public release

-	  0.90   first internal release

-*/

-/*

-------------------------------------------------------------------------------

-This software is available under 2 licenses -- choose whichever you prefer.

-------------------------------------------------------------------------------

-ALTERNATIVE A - MIT License

-Copyright (c) 2017 Sean Barrett

-Permission is hereby granted, free of charge, to any person obtaining a copy of

-this software and associated documentation files (the "Software"), to deal in

-the Software without restriction, including without limitation the rights to

-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

-of the Software, and to permit persons to whom the Software is furnished to do

-so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in all

-copies or substantial portions of the Software.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

-SOFTWARE.

-------------------------------------------------------------------------------

-ALTERNATIVE B - Public Domain (www.unlicense.org)

-This is free and unencumbered software released into the public domain.

-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

-software, either in source code form or as a compiled binary, for any purpose,

-commercial or non-commercial, and by any means.

-In jurisdictions that recognize copyright laws, the author or authors of this

-software dedicate any and all copyright interest in the software to the public

-domain. We make this dedication for the benefit of the public at large and to

-the detriment of our heirs and successors. We intend this dedication to be an

-overt act of relinquishment in perpetuity of all present and future rights to

-this software under copyright law.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-------------------------------------------------------------------------------

-*/

--- a/Raw_Demos/stringutil.h

+++ /dev/null

@@ -1,330 +1,0 @@

-#include <string.h>

-#include <stdlib.h>

-#include <stdio.h>

-#include <ctype.h>

-//Before we get on, "stringutil.h" is the most C-ish name for a source code file ever, amirite?

-#ifndef STRUTIL_ALLOC

-#define STRUTIL_ALLOC(s) malloc(s)

-#endif

-#ifndef STRUTIL_FREE

-#define STRUTIL_FREE(s) free(s)

-#endif

-#ifndef STRUTIL_REALLOC

-#define STRUTIL_REALLOC(s, t) realloc(s,t)

-#endif

-#ifndef STRUTIL_NO_SHORT_NAMES

-#define strcata strcatalloc

-#define strcataf1 strcatallocf1

-#define strcataf2 strcatallocf2

-#define strcatafb strcatallocfb

-#endif

-//Strcat but with malloc.

-static inline char* strcatalloc(const char* s1, const char* s2){

-	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		strcpy(d, s1);

-		strcat(d, s2);

-	}

-	return d;

-}

-//Free the first argument.

-static inline char* strcatallocf1(char* s1, const char* s2){

-	char* d = STRUTIL_REALLOC(s1, strlen(s1) + strlen(s2) + 1);

-	//char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		//strcpy(d, s1);

-		strcat(d, s2);

-	}

-	//STRUTIL_FREE(s1);

-	return d;

-}

-//Free the second argument.

-static inline char* strcatallocf2(const char* s1, char* s2){

-	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		strcpy(d, s1);

-		strcat(d, s2);

-	}

-	STRUTIL_FREE(s2);

-	return d;

-}

-//Free both arguments

-static inline char* strcatallocfb(char* s1, char* s2){

-	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		strcpy(d, s1);

-		strcat(d, s2);

-	}

-	STRUTIL_FREE(s1);

-	STRUTIL_FREE(s2);

-	return d;

-}

-//Convert a non-null-terminated URL into a null terminated one.

-static inline char* str_null_terminated_alloc(const char* in, unsigned int len){

-	char* d = NULL; d = malloc(len+1);

-	if(d){

-		memcpy(d,in,len);

-		d[len] = '\0';

-	}

-	return d;

-}

-static inline unsigned int strprefix(const char *pre, const char *str)

-{

-    size_t lenpre = strlen(pre),

-           lenstr = strlen(str);

-    return lenstr < lenpre ? 0 : memcmp(pre, str, lenpre) == 0;

-}

-//Someone once said sub-string search was an O(n^2) algorithm. What the hell?

-static inline long long strfind(const char* text, const char* subtext){

-	long long ti = 0;

-	long long si = 0;

-	long long st = strlen(subtext);

-	for(;text[ti] != '\0';ti++){

-		if(text[ti] == subtext[si]) {

-			si++;

-			if(subtext[si] == '\0') return (ti - st)+1;

-		}else {

-			si = 0;

-			if(subtext[si] == '\0') return (ti - st);

-		}

-	}

-	return -1;

-}

-//Read file until terminator character is found.

-//Returns the number of characters copied.

-static inline unsigned long long read_until_terminator(FILE* f, char* buf, const unsigned long long buflen, char terminator){

-	unsigned long long i = 0;

-	char c;

-	for(i = 0; i < (buflen-1); i++)

-	{

-		if(feof(f))break;

-		c = fgetc(f);

-		if(c == terminator)break;

-		buf[i] = c;

-	}

-	buf[buflen-1] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!

-	return i;

-}

-//Same as above but allocates memory to guarantee it can hold the entire thing. Grows naturally.

-static inline char* read_until_terminator_alloced(FILE* f, unsigned long long* lenout, char terminator, unsigned long long initsize){

-	char c;

-	char* buf = STRUTIL_ALLOC(initsize);

-	if(!buf) return NULL;

-	unsigned long long bcap = initsize;

-	unsigned long long blen = 0;

-	while(1){

-		if(feof(f)){break;}

-		c = fgetc(f);

-		if(c == terminator) {break;}

-		if(blen == (bcap-1))	//Grow the buffer.

-			{

-				bcap<<=1;

-				char* bufold = buf;

-				buf = STRUTIL_REALLOC(buf, bcap);

-				if(!buf){free(bufold); return NULL;}

-			}

-		buf[blen++] = c;

-	}

-	buf[blen] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!

-	*lenout = blen;

-	return buf;

-}

-static inline void* read_file_into_alloced_buffer(FILE* f, unsigned long long* len){

-	void* buf = NULL;

-	if(!f) return NULL;

-	fseek(f, 0, SEEK_END);

-	*len = ftell(f);

-	fseek(f,0,SEEK_SET);

-	buf = STRUTIL_ALLOC(*len + 1);

-	if(!buf) return NULL;

-	fread(buf, 1, *len, f);

-	((char*)buf)[*len] = '\0';

-	return buf;

-}

-//GEK'S SIMPLE TEXT COMPRESSION SCHEMA

-/*LIMITATIONS

-* Token names must be alphabetic (a-z, A-Z)

-* The token mark must be escaped with a backslash.

-* Token names which are substrings of other ones must be listed later

-*/

-static inline char* strencodealloc(const char* inbuf, const char** tokens, unsigned long long ntokens, char esc, char tokmark){

-	unsigned long long lenin = strlen(inbuf);

-	char c; unsigned long long i = 0;

-	char c_str[512] = {0}; //We are going to be sprintf-ing to this buffer.

-	char* out = NULL;

-	c_str[0] = esc;

-	c_str[1] = tokmark;

-	out = strcatalloc(c_str, "");

-	c_str[0] = 0;

-	c_str[1] = 0;

-	//Write out all the token entries. format is namelength~definition

-	for(unsigned long long j = 0; j < ntokens; j++){

-		out = strcataf1(out, tokens[2*j]);

-		//Write out the length of the token.

-		snprintf(c_str, 512, "%llu", (unsigned long long)strlen(tokens[2*j+1]));

-		out = strcataf1(out, c_str);

-		c_str[0] = tokmark;

-		c_str[1] = 0;

-		out = strcataf1(out, c_str);

-		out = strcataf1(out, tokens[2*j+1]);

-	}

-	c_str[0] = esc;

-	c_str[1] = 0;

-	out = strcataf1(out, c_str);

-	//We have now created the header. Now to begin encoding the text.

-	for(i=0; i<lenin; i++){

-		for(unsigned long long t = 0; t < ntokens; t++) //t- the token we are processing.

-			if(strprefix(tokens[t*2+1], inbuf+i)){ //Matched at least one

-				unsigned long long howmany = 1;

-				unsigned long long curtoklen = strlen(tokens[t*2+1]); //Length of the current token we are counting

-				for(unsigned long long h=1;i+h*curtoklen < lenin;h++){

-					if(strprefix(tokens[t*2+1], inbuf+i+h*curtoklen))

-						{howmany++;}

-					else

-						break; //The number of these things is limited.

-				}

-				//We know what token and how many, write it to out

-				c_str[0] = tokmark;

-				c_str[1] = 0;

-				out = strcataf1(out, c_str);

-				if(howmany > 1){

-					snprintf(c_str, 512, "%llu", (unsigned long long)howmany);

-					out = strcataf1(out, c_str);

-				}

-				out = strcataf1(out, tokens[t*2]);

-				i+=howmany*curtoklen;

-				continue;

-			}

-		//Test if we need to escape a sequence.

-		if(inbuf[i] == esc || inbuf[i] == tokmark){

-			c_str[0] = esc;

-			c_str[1] = 0;

-			out = strcataf1(out, c_str);

-		}

-		//We were unable to find a match, just write the character out.

-		c_str[0] = inbuf[i];

-		c_str[1] = 0;

-		out = strcataf1(out, c_str);

-	}

-	return out;

-}

-static inline char* strdecodealloc(char* inbuf){

-	unsigned long long lenin = strlen(inbuf);

-	if(lenin < 3) {

-		//puts("\nToo Short!\n");

-		return NULL;

-	}

-	char esc = inbuf[0]; //The escape character is the first one.

-	char tokmark = inbuf[1]; //Begin token character.

-	//printf("Escape is %c, tokmark is %c\n", esc, tokmark);

-	char c; unsigned long long i = 2;

-	char c_str[2] = {0,0};

-	//Our decoded text.

-	char* out = strcatalloc("","");

-	//Tokens for replacement, even is the token,

-	//odd is its definition

-	char** tokens = NULL;

-	//unsigned long long* toklens = NULL;

-	unsigned long long ntokens = 0;

-//#define {if(i <= lenin) c = inbuf[i++]; else {goto end;}} {if(i <= lenin) c = inbuf[i++]; else {goto end;}}

-	//Retrieve the tokens.

-	{if(i <= lenin) c = inbuf[i++]; else {goto end;}}; //has to occur before the loop.

-	while(c != esc){	ntokens++;

-		tokens = STRUTIL_REALLOC(tokens, ntokens * 2 * sizeof(char*));

-		//toklens = STRUTIL_REALLOC(toklens, ntokens * sizeof(unsigned long long));

-		//toklens[ntokens-1] = 0;

-		tokens[(ntokens-1)*2] = strcatalloc("","");

-		tokens[(ntokens-1)*2+1] = strcatalloc("","");

-		//name of token is tokens[(ntokens-1)*2] and its definition is tokens[(ntokens-1)*2+1]

-		//Get the name of the token.

-		if(!isalpha(c)) goto end;	//Error! Can't have Break out.

-		while(isalpha(c)){

-			c_str[0] = c;

-			tokens[(ntokens-1)*2] = strcatallocf1(tokens[(ntokens-1)*2], c_str);

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-		}

-		//The last retrieve() got us the first digit of the token length.

-		//Get the length of the token

-		unsigned long long l = 0;

-		if(!isdigit(c)) goto end;

-		while(isdigit(c) && c!=tokmark){

-			c_str[0] = c;

-			l *= 10;

-			l += atoi(c_str);

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-		}

-		//toklens[ntokens-1] = l;

-		//We have the name of the token and its length, the last {if(i <= lenin) c = inbuf[i++]; else {goto end;}} got us the token character (~ in my example)

-		//Now we can grab the token definition.

-		for(unsigned long long vv = 0; vv < l; vv++){

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-			c_str[0] = c;

-			tokens[(ntokens-1)*2+1] = strcatallocf1(tokens[(ntokens-1)*2+1], c_str);

-		}

-		{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-	//	printf("\nTOKEN %s IS %s, length %llu",tokens[(ntokens-1)*2] ,tokens[(ntokens-1)*2+1], l);

-	}

-	//puts("\nREACHED ESCAPE CHARACTER.");

-	//Now we attempt to build our string

-	{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-	long long doescape = 0;

-	while(i<=lenin){

-		if(!doescape && c==esc){

-			doescape=1;{if(i <= lenin) c = inbuf[i++]; else {goto end;}};continue;

-		}

-		if(!doescape && c==tokmark){

-			//Handle digits prefixing a token.

-			unsigned long long l = 0;

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-			if(isdigit(c))

-				while(isdigit(c)){

-					c_str[0] = c;

-					l *= 10;

-					l += atoi(c_str);

-					{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-				}

-			else {l=1;}

-			i--;

-			for(unsigned long long t = 0; t < ntokens; t++)

-				if(strprefix(tokens[t*2], inbuf+i)){

-					//MATCH!

-					for(unsigned long long q = 0; q < l; q++)

-						out = strcatallocf1(out, tokens[t*2+1]);

-					i+=strlen(tokens[t*2]);

-					break; //break out of the for.

-				}

-			if(i<=lenin) {if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-			continue;

-		}else{

-			c_str[0] = c;

-			out = strcatallocf1(out, c_str);

-			doescape = 0;

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-		}

-	}

-	end:

-	if(tokens){

-		for(unsigned long long j = 0; j < ntokens; j++)

-			{STRUTIL_FREE(tokens[j*2]);STRUTIL_FREE(tokens[j*2+1]);}

-		STRUTIL_FREE(tokens);

-	}

-	//if(toklens)STRUTIL_FREE(toklens);

-	return out;

-}

--- a/Raw_Demos/t2i.c

+++ b/Raw_Demos/t2i.c

@@ -24,7 +24,7 @@

 #define CHAD_MATH_IMPL

 //Drags in Math and String (which are already dragged in above.)

-#include "include/3dMath.h"

+#include "../include-demo/3dMath.h"

 //Requires

/*

@@ -39,8 +39,8 @@

*/

 #define STBIW_ASSERT(x) /* a comment */

 #define STB_IMAGE_WRITE_IMPLEMENTATION

-#include "include/stb_image_write.h"

-#include "stringutil.h"

+#include "../include-demo/stb_image_write.h"

+#include "../include-demo/stringutil.h"

 typedef unsigned char uchar;

 int tsize = 1;

--- a/SDL_Examples/gears.c

+++ b/SDL_Examples/gears.c

@@ -17,9 +17,9 @@

 #include "../include/zbuffer.h"

 #define CHAD_API_IMPL

 #define CHAD_MATH_IMPL

-#include "include/3dMath.h"

+#include "../include-demo/3dMath.h"

 #ifdef PLAY_MUSIC

-#include "include/api_audio.h"

+#include "../include-demo/api_audio.h"

 #else

 typedef unsigned char uchar;

 #endif

--- a/SDL_Examples/helloworld.c

+++ b/SDL_Examples/helloworld.c

@@ -18,9 +18,9 @@

 #include "../include/zbuffer.h"

 #define CHAD_API_IMPL

 #define CHAD_MATH_IMPL

-#include "include/3dMath.h"

+#include "../include-demo/3dMath.h"

 #ifdef PLAY_MUSIC

-#include "include/api_audio.h"

+#include "../include-demo/api_audio.h"

 #else

 typedef unsigned char uchar;

 #endif

--- a/SDL_Examples/include/3dMath.h

+++ /dev/null

@@ -1,554 +1,0 @@

-/* Public Domain / CC0 C99 Vector Math Library

-*/

-#ifndef CHAD_MATH_H

-#define CHAD_MATH_H

-//#define CHAD_MATH_NO_ALIGN

-#ifndef CHAD_MATH_NO_ALIGN

-#include <stdalign.h>

-#define CHAD_ALIGN alignas(16)

-#else

-#define CHAD_ALIGN /*a comment*/

-#endif

-#include <math.h>

-#include <string.h>

-typedef float f_;

-typedef unsigned int uint;

-#define MAX(x,y) (x>y?x:y)

-#define MIN(x,y) (x<y?x:y)

-typedef struct {CHAD_ALIGN f_ d[3];} vec3;

-typedef struct {CHAD_ALIGN int d[3];} ivec3;

-typedef struct {CHAD_ALIGN f_ d[4];} vec4;

-typedef struct {CHAD_ALIGN f_ d[16];} mat4;

-//Collision detection

-//These Algorithms return the penetration vector into

-//the shape in the first argument

-//With depth of penetration in element 4

-//if depth of penetration is zero or lower then there is no penetration.

-typedef struct{

-	vec4 c;

-	vec3 e;

-}aabb;

-typedef aabb colshape; //c.d[3] determines if it's a sphere or box. 0 or less = box, greater than 0 = sphere

-static inline vec4 getrow( mat4 a,  uint index){

-	return (vec4){

-		.d[0]=a.d[0*4+index],

-		.d[1]=a.d[1*4+index],

-		.d[2]=a.d[2*4+index],

-		.d[3]=a.d[3*4+index]

-	};

-}

-static inline mat4 swapRowColumnMajor( mat4 in){

-	mat4 result;

-	vec4 t;

-	int i = 0;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);i++;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);i++;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);i++;

-	t = getrow(in,i);

-	memcpy(result.d+i*4, t.d, 4*4);

-	return result;

-}

-static inline vec4 getcol( mat4 a,  uint index){

-	return (vec4){

-		.d[0]=a.d[index*4+0],

-		.d[1]=a.d[index*4+1],

-		.d[2]=a.d[index*4+2],

-		.d[3]=a.d[index*4+3]

-	};

-}

-static inline mat4 scalemat4( vec4 s){

-	mat4 ret;

-	for(int i = 1; i < 16; i++)

-		ret.d[i]= 0.0;

-	ret.d[0*4 + 0] = s.d[0]; //x scale

-	ret.d[1*4 + 1] = s.d[1]; //y scale

-	ret.d[2*4 + 2] = s.d[2]; //z scale

-	ret.d[3*4 + 3] = s.d[3]; //w scale

-	return ret;

-}

-static inline int invmat4( mat4 m, mat4* invOut) //returns 1 if successful

-{

-    mat4 inv;

-    f_ det;

-    int i;

-    inv.d[0] = m.d[5]  * m.d[10] * m.d[15] -

-             m.d[5]  * m.d[11] * m.d[14] -

-             m.d[9]  * m.d[6]  * m.d[15] +

-             m.d[9]  * m.d[7]  * m.d[14] +

-             m.d[13] * m.d[6]  * m.d[11] -

-             m.d[13] * m.d[7]  * m.d[10];

-    inv.d[4] = -m.d[4]  * m.d[10] * m.d[15] +

-              m.d[4]  * m.d[11] * m.d[14] +

-              m.d[8]  * m.d[6]  * m.d[15] -

-              m.d[8]  * m.d[7]  * m.d[14] -

-              m.d[12] * m.d[6]  * m.d[11] +

-              m.d[12] * m.d[7]  * m.d[10];

-    inv.d[8] = m.d[4]  * m.d[9] * m.d[15] -

-             m.d[4]  * m.d[11] * m.d[13] -

-             m.d[8]  * m.d[5] * m.d[15] +

-             m.d[8]  * m.d[7] * m.d[13] +

-             m.d[12] * m.d[5] * m.d[11] -

-             m.d[12] * m.d[7] * m.d[9];

-    inv.d[12] = -m.d[4]  * m.d[9] * m.d[14] +

-               m.d[4]  * m.d[10] * m.d[13] +

-               m.d[8]  * m.d[5] * m.d[14] -

-               m.d[8]  * m.d[6] * m.d[13] -

-               m.d[12] * m.d[5] * m.d[10] +

-               m.d[12] * m.d[6] * m.d[9];

-    inv.d[1] = -m.d[1]  * m.d[10] * m.d[15] +

-              m.d[1]  * m.d[11] * m.d[14] +

-              m.d[9]  * m.d[2] * m.d[15] -

-              m.d[9]  * m.d[3] * m.d[14] -

-              m.d[13] * m.d[2] * m.d[11] +

-              m.d[13] * m.d[3] * m.d[10];

-    inv.d[5] = m.d[0]  * m.d[10] * m.d[15] -

-             m.d[0]  * m.d[11] * m.d[14] -

-             m.d[8]  * m.d[2] * m.d[15] +

-             m.d[8]  * m.d[3] * m.d[14] +

-             m.d[12] * m.d[2] * m.d[11] -

-             m.d[12] * m.d[3] * m.d[10];

-    inv.d[9] = -m.d[0]  * m.d[9] * m.d[15] +

-              m.d[0]  * m.d[11] * m.d[13] +

-              m.d[8]  * m.d[1] * m.d[15] -

-              m.d[8]  * m.d[3] * m.d[13] -

-              m.d[12] * m.d[1] * m.d[11] +

-              m.d[12] * m.d[3] * m.d[9];

-    inv.d[13] = m.d[0]  * m.d[9] * m.d[14] -

-              m.d[0]  * m.d[10] * m.d[13] -

-              m.d[8]  * m.d[1] * m.d[14] +

-              m.d[8]  * m.d[2] * m.d[13] +

-              m.d[12] * m.d[1] * m.d[10] -

-              m.d[12] * m.d[2] * m.d[9];

-    inv.d[2] = m.d[1]  * m.d[6] * m.d[15] -

-             m.d[1]  * m.d[7] * m.d[14] -

-             m.d[5]  * m.d[2] * m.d[15] +

-             m.d[5]  * m.d[3] * m.d[14] +

-             m.d[13] * m.d[2] * m.d[7] -

-             m.d[13] * m.d[3] * m.d[6];

-    inv.d[6] = -m.d[0]  * m.d[6] * m.d[15] +

-              m.d[0]  * m.d[7] * m.d[14] +

-              m.d[4]  * m.d[2] * m.d[15] -

-              m.d[4]  * m.d[3] * m.d[14] -

-              m.d[12] * m.d[2] * m.d[7] +

-              m.d[12] * m.d[3] * m.d[6];

-    inv.d[10] = m.d[0]  * m.d[5] * m.d[15] -

-              m.d[0]  * m.d[7] * m.d[13] -

-              m.d[4]  * m.d[1] * m.d[15] +

-              m.d[4]  * m.d[3] * m.d[13] +

-              m.d[12] * m.d[1] * m.d[7] -

-              m.d[12] * m.d[3] * m.d[5];

-    inv.d[14] = -m.d[0]  * m.d[5] * m.d[14] +

-               m.d[0]  * m.d[6] * m.d[13] +

-               m.d[4]  * m.d[1] * m.d[14] -

-               m.d[4]  * m.d[2] * m.d[13] -

-               m.d[12] * m.d[1] * m.d[6] +

-               m.d[12] * m.d[2] * m.d[5];

-    inv.d[3] = -m.d[1] * m.d[6] * m.d[11] +

-              m.d[1] * m.d[7] * m.d[10] +

-              m.d[5] * m.d[2] * m.d[11] -

-              m.d[5] * m.d[3] * m.d[10] -

-              m.d[9] * m.d[2] * m.d[7] +

-              m.d[9] * m.d[3] * m.d[6];

-    inv.d[7] = m.d[0] * m.d[6] * m.d[11] -

-             m.d[0] * m.d[7] * m.d[10] -

-             m.d[4] * m.d[2] * m.d[11] +

-             m.d[4] * m.d[3] * m.d[10] +

-             m.d[8] * m.d[2] * m.d[7] -

-             m.d[8] * m.d[3] * m.d[6];

-    inv.d[11] = -m.d[0] * m.d[5] * m.d[11] +

-               m.d[0] * m.d[7] * m.d[9] +

-               m.d[4] * m.d[1] * m.d[11] -

-               m.d[4] * m.d[3] * m.d[9] -

-               m.d[8] * m.d[1] * m.d[7] +

-               m.d[8] * m.d[3] * m.d[5];

-    inv.d[15] = m.d[0] * m.d[5] * m.d[10] -

-              m.d[0] * m.d[6] * m.d[9] -

-              m.d[4] * m.d[1] * m.d[10] +

-              m.d[4] * m.d[2] * m.d[9] +

-              m.d[8] * m.d[1] * m.d[6] -

-              m.d[8] * m.d[2] * m.d[5];

-    det = m.d[0] * inv.d[0] + m.d[1] * inv.d[4] + m.d[2] * inv.d[8] + m.d[3] * inv.d[12];

-    if (det == 0)

-        return 0;

-    det = 1.0 / det;

-    for (i = 0; i < 16; i++)

-        invOut->d[i] = inv.d[i] * det;

-    return 1;

-}

-static inline mat4 perspective( f_ fov,  f_ aspect,  f_ near,  f_ far){

-	mat4 ret;

-	f_ D2R = 3.14159265358979323 / 180.0;

-	f_ yScale = 1.0/tanf(D2R * fov/2);

-	f_ xScale = yScale/aspect;

-	f_ nearmfar = near-far;

-	ret.d[0*4+0] = xScale; 	ret.d[0*4+1]=0; 	ret.d[0*4+2]=0;					ret.d[0*4+3]=0;

-	ret.d[1*4+0]=0; 		ret.d[1*4+1]=yScale;ret.d[1*4+2]=0;					ret.d[1*4+3]=0;

-	ret.d[2*4+0]=0; 		ret.d[2*4+1]=0;		ret.d[2*4+2]=(far+near)/nearmfar;ret.d[2*4+3]=-1;

-	ret.d[3*4+0]=0; 		ret.d[3*4+1]=0;		ret.d[3*4+2]=2*far*near/nearmfar;ret.d[3*4+3]=0;

-	/*

-	ret.d[0*4+0] = xScale; 	ret.d[0*4+1]=0; 	ret.d[0*4+2]=0;						ret.d[0*4+3]=0;

-	ret.d[1*4+0]=0; 		ret.d[1*4+1]=yScale;ret.d[1*4+2]=0;						ret.d[1*4+3]=0;

-	ret.d[2*4+0]=0; 		ret.d[2*4+1]=0;		ret.d[2*4+2]=(far+near)/nearmfar;	ret.d[2*4+3]=2*far*near/nearmfar;

-	ret.d[3*4+0]=0; 		ret.d[3*4+1]=0;		ret.d[3*4+2]=-1;					ret.d[3*4+3]=0;

-	*/

-	return ret;

-}

-static inline vec3 viewport( uint xdim,  uint ydim,  vec3 input){

-	input.d[0] += 1;

-	input.d[1] += 1;

-	input.d[0] *= (f_)xdim / 2.0;

-	input.d[1] *= (f_)ydim / 2.0;

-	input.d[2] = (input.d[2])/2.0;

-	return input;

-}

-static inline mat4 rotate( vec3 rotation){

-	f_ a = rotation.d[0];

-	f_ b = rotation.d[1];

-	f_ c = rotation.d[2];

-	mat4 rm;

-	rm.d[0*4 + 0] = cosf(a)*cosf(b);

-	rm.d[1*4 + 0] = sinf(a)*cosf(b);

-	rm.d[2*4 + 0] = -sinf(b);

-	rm.d[0*4 + 1] = cosf(a)*sinf(b)*sinf(c)-sinf(a)*cosf(c);

-	rm.d[1*4 + 1] = sinf(a)*sinf(b)*sinf(c)+cosf(a)*cosf(c);

-	rm.d[2*4 + 1] = cosf(b)*sinf(c);

-	rm.d[0*4 + 2] = cosf(a)*sinf(b)*cosf(c)+sinf(a)*sinf(c);

-	rm.d[1*4 + 2] = sinf(a)*sinf(b)*cosf(c)-cosf(a)*sinf(c);

-	rm.d[2*4 + 2] = cosf(b)*cosf(c);

-	//the other parts

-	rm.d[0*4 + 3] = 0;

-	rm.d[1*4 + 3] = 0;

-	rm.d[2*4 + 3] = 0;

-	rm.d[3*4 + 3] = 1; //the bottom right corner of the matrix.

-	rm.d[3*4 + 0] = 0;

-	rm.d[3*4 + 1] = 0;

-	rm.d[3*4 + 2] = 0;

-	return rm;

-}

-static inline f_ clampf( f_ a,  f_ min,  f_ max){

-	if(a<min) return min;

-	if(a>max) return max;

-	return a;

-}

-static inline f_ lengthv3( vec3 a){

-	return sqrtf(a.d[0] * a.d[0] + a.d[1] * a.d[1] + a.d[2] * a.d[2]);

-}

-static inline f_ lengthv4( vec4 a){

-	return sqrtf(a.d[0] * a.d[0] + a.d[1] * a.d[1] + a.d[2] * a.d[2] + a.d[3] * a.d[3]);

-}

-static inline vec3 multvec3( vec3 a,  vec3 b){

-	return (vec3){

-		.d[0]=a.d[0]*b.d[0],

-		.d[1]=a.d[1]*b.d[1],

-		.d[2]=a.d[2]*b.d[2]

-	};

-}

-static inline vec4 multvec4( vec4 a,  vec4 b){

-	return (vec4){

-		.d[0]=a.d[0]*b.d[0],

-		.d[1]=a.d[1]*b.d[1],

-		.d[2]=a.d[2]*b.d[2],

-		.d[3]=a.d[3]*b.d[3]

-	};

-}

-static inline vec3 clampvec3( vec3 a,  vec3 min,  vec3 max){

-	vec3 ret;

-	ret.d[0] = clampf(a.d[0],min.d[0],max.d[0]);

-	ret.d[1] = clampf(a.d[1],min.d[1],max.d[1]);

-	ret.d[2] = clampf(a.d[2],min.d[2],max.d[2]);

-	return ret;

-}

-static inline vec4 clampvec4( vec4 a,  vec4 min,  vec4 max){

-	vec4 ret;

-	ret.d[0] = clampf(a.d[0],min.d[0],max.d[0]);

-	ret.d[1] = clampf(a.d[1],min.d[1],max.d[1]);

-	ret.d[2] = clampf(a.d[2],min.d[2],max.d[2]);

-	ret.d[3] = clampf(a.d[3],min.d[3],max.d[3]);

-	return ret;

-}

-static inline f_ dotv3( vec3 a,  vec3 b){

-	return a.d[0] * b.d[0] + a.d[1] * b.d[1] + a.d[2] * b.d[2];

-}

-static inline f_ dotv4( vec4 a,  vec4 b){

-	return a.d[0] * b.d[0] + a.d[1] * b.d[1] + a.d[2] * b.d[2] + a.d[3] * b.d[3];

-}

-static inline mat4 multm4( mat4 a,  mat4 b){

-	mat4 ret;

-	for(int i = 0; i < 4; i++)

-	for(int j = 0; j < 4; j++)

-		ret.d[i*4 + j] = dotv4(

-			getrow(a, j),

-			getcol(b, i)

-		);

-	return ret;

-}

-static inline vec4 mat4xvec4( mat4 t,  vec4 v){

-	uint i = 0;

-	vec4 vr;

-	vr.d[0] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	i++;

-	vr.d[1] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	i++;

-	vr.d[2] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	i++;

-	vr.d[3] = 	t.d[0*4+i] * v.d[0] +

-				t.d[1*4+i] * v.d[1] +

-				t.d[2*4+i] * v.d[2] +

-				t.d[3*4+i] * v.d[3];

-	return vr;

-}

-static inline vec3 crossv3( vec3 a,  vec3 b){

-	vec3 retval;

-	retval.d[0] = a.d[1] * b.d[2] - a.d[2] * b.d[1];

-	retval.d[1] = a.d[2] * b.d[0] - a.d[0] * b.d[2];

-	retval.d[2] = a.d[0] * b.d[1] - a.d[1] * b.d[0];

-	return retval;

-}

-static inline vec3 scalev3( f_ s,  vec3 i){i.d[0] *= s; i.d[1] *= s; i.d[2] *= s; return i;}

-static inline vec4 scalev4( f_ s,  vec4 i){i.d[0] *= s; i.d[1] *= s; i.d[2] *= s;i.d[3] *= s; return i;}

-static inline vec3 normalizev3( vec3 a){

-  	if(lengthv3(a)==0) return (vec3){.d[0]=0.0,.d[1]=0.0,.d[2]=1.0};

-	return scalev3(1.0/lengthv3(a), a);

-}

-static inline vec4 normalizev4( vec4 a){

-  	if(lengthv4(a)==0) return (vec4){.d[0]=0.0,.d[1]=0.0,.d[2]=1.0,.d[3]=0.0};

-	return scalev4(1.0/lengthv4(a), a);

-}

-static inline vec3 addv3( vec3 aa,  vec3 b){

-	vec3 a = aa;

-	a.d[0] += b.d[0]; a.d[1] += b.d[1]; a.d[2] += b.d[2]; return a;

-}

-static inline vec3 rotatev3( vec3 in,  vec3 axis,  f_ ang){

-	vec3 t1 = scalev3(cosf(ang),in);

-	vec3 t2 = scalev3(sinf(ang),crossv3(axis,in));

-	vec3 t3 = scalev3((1-cosf(ang))*dotv3(axis,in),axis);

-	return addv3(t1,addv3(t2,t3));

-}

-static inline vec4 addv4( vec4 aa,  vec4 b){

-	vec4 a = aa;

-	a.d[0] += b.d[0]; a.d[1] += b.d[1]; a.d[2] += b.d[2]; a.d[3] += b.d[3]; return a;

-}

-static inline vec3 subv3( vec3 a,  vec3 b){

-	return addv3(a,scalev3(-1,b));

-}

-static inline mat4 identitymat4(){

-	return scalemat4(

-		(vec4){.d[0]=1.0,.d[1]=1.0,.d[2]=1.0,.d[3]=1.0}

-	);

-}

-static inline mat4 translate( vec3 t){

-	mat4 tm = identitymat4();

-	tm.d[3*4+0] = t.d[0];

-	tm.d[3*4+1] = t.d[1];

-	tm.d[3*4+2] = t.d[2];

-	return tm;

-}

-static inline vec4 subv4( vec4 a,  vec4 b){

-	return addv4(a,scalev4(-1,b));

-}

-static inline vec3 reflect( vec3 in,  vec3 norm){

-	return

-	addv3(in, //I +

-		scalev3(-2.0*dotv3(norm, in), //-2.0 * dotv3(norm,in) *

-			norm //N

-		)

-	);

-}

-static inline vec4 upv3( vec3 in,  f_ w){

-	return (vec4){

-		.d[0]=in.d[0],

-		.d[1]=in.d[1],

-		.d[2]=in.d[2],

-		.d[3]=w

-	};

-}

-static inline vec3 downv4( vec4 in){

-	return (vec3){

-		.d[0]=in.d[0],

-		.d[1]=in.d[1],

-		.d[2]=in.d[2]

-	};

-}

-static inline mat4 lookAt( vec3 eye,  vec3 at,  vec3 up){

-	mat4 cw = identitymat4();

-	vec3 zaxis = normalizev3(subv3(at,eye));

-	vec3 xaxis = normalizev3(crossv3(zaxis,up));

-	vec3 yaxis = crossv3(xaxis, zaxis);

-	zaxis = scalev3(-1,zaxis);

-	cw.d[0*4+0] = xaxis.d[0];

-	cw.d[1*4+0] = xaxis.d[1];

-	cw.d[2*4+0] = xaxis.d[2];

-	cw.d[3*4+0] = -dotv3(xaxis,eye);

-	cw.d[0*4+1] = yaxis.d[0];

-	cw.d[1*4+1] = yaxis.d[1];

-	cw.d[2*4+1] = yaxis.d[2];

-	cw.d[3*4+1] = -dotv3(yaxis,eye);

-	cw.d[0*4+2] = zaxis.d[0];

-	cw.d[1*4+2] = zaxis.d[1];

-	cw.d[2*4+2] = zaxis.d[2];

-	cw.d[3*4+2] = -dotv3(zaxis,eye);

-	cw.d[0*4+3] = 0;

-	cw.d[1*4+3] = 0;

-	cw.d[2*4+3] = 0;

-	cw.d[3*4+3] = 1;

-	return cw;

-}

-//Collision detection

-//These Algorithms return the penetration vector into

-//the shape in the first argument

-//With depth of penetration in element 4

-//if depth of penetration is zero or lower then there is no penetration.

-static inline vec4 spherevsphere( vec4 s1,  vec4 s2){ //x,y,z,radius

-	vec4 ret;

-	vec3 diff = subv3(

-				downv4(s2),

-				downv4(s1)

-			);

-	float lv3 = lengthv3(diff);

-	float l = (s1.d[3] + s2.d[3]-lv3);

-	if(l < 0 || lv3 == 0) {

-		ret.d[3] = 0;return ret;

-	}

-	ret = upv3(

-		scalev3(

-			l/lv3,diff

-		)

-		,l

-	);

-	return ret;

-}

-static inline vec4 boxvbox( aabb b1,  aabb b2){ //Just points along the minimum separating axis, Nothing fancy.

-	vec4 ret = (vec4){

-		.d[0]=0,

-		.d[1]=0,

-		.d[2]=0,

-		.d[3]=0

-	};

-	vec3 sumextents = addv3(b1.e,b2.e);

-	vec3 b1c = downv4(b1.c);

-	vec3 b2c = downv4(b2.c);

-	vec3 b1min = subv3(b1c,b1.e);

-	vec3 b2min = subv3(b2c,b2.e);

-	vec3 b1max = addv3(b1c,b1.e);

-	vec3 b2max = addv3(b2c,b2.e);

-	if(

-		!(

-			(fabs(b1c.d[0] - b2c.d[0]) <= sumextents.d[0]) &&

-			(fabs(b1c.d[1] - b2c.d[1]) <= sumextents.d[1]) &&

-			(fabs(b1c.d[2] - b2c.d[2]) <= sumextents.d[2])

-		)

-	){

-		return ret;

-	}

-	vec3 axispen[2];

-	axispen[0] = subv3(b1max,b2min);

-	axispen[1] = subv3(b1min,b2max);

-	ret.d[3] = axispen[0].d[0];

-	ret.d[0] = axispen[0].d[0];

-	for(int i = 1; i < 6; i++){

-		if(fabs(axispen[i/3].d[i%3]) < fabs(ret.d[3])){

-			ret = (vec4){

-						.d[0]=0,

-						.d[1]=0,

-						.d[2]=0,

-						.d[3]=(axispen[i/3].d[i%3])

-					};

-			ret.d[i%3] = ret.d[3];

-			ret.d[3] = fabs(ret.d[3]);

-		}

-	}

-	return ret;

-}

-static inline vec3 closestpointAABB( aabb b,  vec3 p){

-	vec3 b1min = subv3(downv4(b.c),b.e);

-	vec3 b1max = addv3(downv4(b.c),b.e);

-	return clampvec3(p,b1min,b1max);

-}

-static inline vec4 spherevaabb( vec4 sph,  aabb box){

-	vec4 ret;

-	vec3 p = closestpointAABB(box,downv4(sph));

-	vec3 v = subv3(p,downv4(sph));

-	f_ d2 = dotv3(v,v);

-	if(d2 <= sph.d[3] * sph.d[3]){

-		f_ len = lengthv3(v);

-		f_ diff = (sph.d[3] - len);

-		if(len > 0){

-			f_ factor = diff/len;

-			vec3 bruh = scalev3(factor, v);

-			ret = upv3(bruh, diff);

-			return ret;

-		} else {

-			aabb virt;

-			virt.c = sph;

-			virt.e.d[0] = sph.d[3];

-			virt.e.d[1] = sph.d[3];

-			virt.e.d[2] = sph.d[3];

-			return boxvbox(virt,box);

-		}

-	}

-	else

-		return (vec4){

-			.d[0]=0,

-			.d[1]=0,

-			.d[2]=0,

-			.d[3]=0

-		};

-}

-//end of chad math impl

-//END Math_Library.h~~~~~~~~~~~~~~~~~~~~

-#endif

--- a/SDL_Examples/include/api_audio.h

+++ /dev/null

@@ -1,97 +1,0 @@

-/* Public Domain / CC0 Audio Playback Mini Library

-Written by Gek (DMHSW) in 2020

-*/

-/*

-HOW TO BUILD THINGS USING THIS LIBRARY

-#define CHAD_API_IMPL

-//^ This line goes in the file you want the "implementation" in.

-#include "api_audio.h"

-*/

-#define USE_MIXER

-#define USE_MP3

-//#ifdef __TINYC__

-//#define STBI_NO_SIMD

-//#define SDL_DISABLE_IMMINTRIN_H

-//#endif

-#include <stdio.h>

-#include <stdlib.h>

-#include <math.h>

-#define SDL_MAIN_HANDLED

-#include <SDL/SDL.h>

-//NOTE: you might need to change these depending on your project structure.

-#ifdef CHAD_API_IMPL

-#define CHAD_MATH_IMPL

-#endif //

-#include "3dMath.h"

-//#include "../../include/fixedmath.h"

-typedef unsigned char uchar;

-extern uint R_;

-extern uint G_;

-extern uint B_;

-extern uint A_;

-#ifdef USE_MIXER

-#include<SDL/SDL_mixer.h>

-void ainit(int needsSDLINIT);

-void acleanup();

-typedef Mix_Chunk samp;

-typedef Mix_Music track;

-samp* lwav(const char* t);

-track* lmus(const char* t);

-samp* qlwav(Uint8* w);

-int aplay(samp* samp, int loops);

-void aPos(int chan, int angle, unsigned char dist);

-void aHalt(int chan);

-int mplay(track* mus,int loops, int ms);

-void mhalt();

-#ifdef CHAD_API_IMPL

-void ainit(int needsSDLINIT){

-	if(needsSDLINIT)

-		if (SDL_Init(SDL_INIT_AUDIO)!=0) //We only use SDL for mixer...

-		{

-			fprintf(stderr, "SDL_Init Error: %s\n", SDL_GetError());

-			exit(0);

-		}

-	Mix_Init(MIX_INIT_OGG | MIX_INIT_MP3);

-	if(-1 == Mix_OpenAudio(44100, MIX_DEFAULT_FORMAT, 2, 1024)) {printf("\nAudio can't init :(");exit(2);}

-}

-void acleanup(){

-	Mix_CloseAudio();

-	Mix_Quit();

-	SDL_Quit();

-}

-void mhalt(){Mix_HaltMusic();}

-void aHalt(int chan){Mix_HaltChannel(chan);}

-samp* lwav(const char* t){return Mix_LoadWAV(t);}

-track* lmus(const char* t){return Mix_LoadMUS(t);}

-samp* qlwav(Uint8* w){return Mix_QuickLoad_WAV(w);}

-int aplay(samp* samp, int loops){return Mix_PlayChannel(-1, samp, loops);}

-void aPos(int chan, int angle, unsigned char dist){Mix_SetPosition(chan,angle,dist);}

-int mplay(track* mus,int loops, int ms){return Mix_FadeInMusic(mus,loops,ms);}

-//end of chad api impl

-#endif

-//end of USE_MIXER

-#endif

-#define MAX(x,y) (x>y?x:y)

-#define MIN(x,y) (x<y?x:y)

-#define CHAD_API_NEAR 0.0

--- a/SDL_Examples/include/chadphys.h

+++ /dev/null

@@ -1,106 +1,0 @@

-#ifndef CHAD_PHYS_H

-#define CHAD_PHYS_H

-#ifdef CHAD_PHYS_IMPL

-#define CHAD_MATH_IMPL

-#endif

-#include "3dMath.h"

-typedef struct {

-	aabb shape; //c.d[3] is sphere radius.

-		//if it's zero or less, it's not a sphere, it's a box

-	f_ mass; //0 means kinematic, or static. Defaults to zero.

-	f_ bounciness; //default 0, put portion of displacement into velocity.

-	f_ airfriction; //default 1, multiplied by velocity every time timestep.

-	f_ friction; //default 0.1

-	vec3 r; //Rotation, Used for rendering only

-	vec3 v; //velocity

-	vec3 a; //Body specific acceleration, combined with gravity

-	void* d; //User defined pointer.

-} phys_body;

-typedef struct{

-	phys_body* abodies; //mass non-zero

-	phys_body* sbodies; //mass zero

-	uint nabodies; //number of abodies

-	uint nsbodies; //number of sbodies

-	vec3 g; //gravity

-	f_ ms; //max speed

-} phys_world;

-void stepPhysWorld(phys_world* world);

-void resolveBodies(phys_body* a, phys_body* b);

-void initPhysWorld(phys_world* world); //inits to NULL

-void initPhysBody(phys_body* body); //inits to defaults specified above.

-#ifdef CHAD_PHYS_IMPL

-//TODO: implement functions

-void initPhysBody(phys_body* body){

-	body->shape = (aabb){

-		.c=(vec4){.d[0] = 0,.d[1] = 0,.d[2] = 0,.d[3] = 0},

-		.e=(vec3){.d[0] = 0,.d[1] = 0,.d[2] = 0}

-	};

-	body->mass = 0;

-	body->bounciness = 0;

-	body->friction = 0.1;

-	body->airfriction = 1.0;

-	body->a = (vec3){.d[0] = 0,.d[1] = 0,.d[2] = 0};

-	body->r = (vec3){.d[0] = 0,.d[1] = 0,.d[2] = 0};

-	body->d = NULL;

-}

-//Check for and, if necessary, resolve colliding bodies.

-void resolveBodies(phys_body* a, phys_body* b){

-	if(a->mass <= 0 && b->mass <= 0) return; //Perform a preliminary check. Do we even have to do anything?

-	//Check if the two bodies are colliding.

-	vec4 penvec = (vec4){

-		.d[0]=0,

-		.d[1]=0,

-		.d[2]=0,

-		.d[3]=0

-	};

-	if(a->shape.c.d[3] > 0 && b->shape.c.d[3] > 0) //Both Spheres!

-	{

-		penvec = spherevsphere(a->shape.c, b->shape.c);

-	} else if(a->shape.c.d[3] <= 0 && b->shape.c.d[3] <= 0) //Both boxes!

-	{

-		penvec = boxvbox(a->shape,b->shape);

-	} else if (a->shape.c.d[3] > 0 && b->shape.c.d[3] <= 0) //a is a sphere, b is a box

-	{

-		penvec = spherevaabb(a->shape.c,b->shape);

-	} else if (a->shape.c.d[3] <= 0 && b->shape.c.d[3] > 0){ //a is a box, b is a sphere

-		penvec = spherevaabb(b->shape.c,a->shape);

-		penvec.d[0] *= -1;

-		penvec.d[1] *= -1;

-		penvec.d[2] *= -1;

-	} else {

-		puts("\nInvalid configuration. Error.\n");

-	}

-	if(penvec.d[3] <= 0) return; //No penetration detected, or invalid configuration.

-	//We now have the penetration vector. There is a penetration.

-	//determine how much each should be displaced by.

-	//The penvec points INTO A and is of length penvec.d[3]

-	float bdisplacefactor = a->mass / (a->mass + b->mass), adisplacefactor = b->mass / (a->mass + b->mass);

-	if(!(a->mass > 0)) {adisplacefactor = 0; bdisplacefactor = 1;}

-	if(!(b->mass > 0)) {bdisplacefactor = 0; adisplacefactor = 1;}

-	vec3 avel = a->v;

-	vec3 bvel = b->v;

-	vec3 arelvel = subv3(a->v, b->v);

-	vec3 brelvel = subv3(b->v, a->v);

-	if(a->mass > 0){

-		vec4 displacea = scalev4(-adisplacefactor, penvec); //Note: SSE will accelerate a 4-lane multiply better than 3.

-		a->shape.c.d[0] += displacea.d[0];

-		a->shape.c.d[1] += displacea.d[1];

-		a->shape.c.d[2] += displacea.d[2];

-		a->v = addv3(scalev3(1.0-a->friction, arelvel),bvel); //Apply friction!

-		a->v = addv3(a->v, scalev3( a->bounciness, downv4(displacea) ) );

-	}

-	if(b->mass > 0){

-		vec4 displaceb = scalev4(bdisplacefactor, penvec); //The vector returned by collision functions points INTO B!

-		b->shape.c.d[0] += displaceb.d[0];

-		b->shape.c.d[1] += displaceb.d[1];

-		b->shape.c.d[2] += displaceb.d[2];

-		b->v = addv3(scalev3(1.0 - b->friction, brelvel),avel);

-		b->v = addv3(b->v, scalev3( b->bounciness, downv4(displaceb) ) );

-	}

-}

-#endif

-#endif

--- a/SDL_Examples/include/lockstepthread.h

+++ /dev/null

@@ -1,134 +1,0 @@

-/* Public Domain / CC0 3d Lock-Step Threading Implementation

-Written by Gek (DMHSW) in 2020

-*/

-#ifndef LOCKSTEPTHREAD_H

-#define LOCKSTEPTHREAD_H

-#include <pthread.h>

-#include <stdlib.h>

-typedef struct {

-	pthread_mutex_t myMutex;

-	pthread_barrier_t myBarrier;

-	pthread_t myThread;

-	int isThreadLive;

-	int shouldKillThread;

-	int state;

-	void (*execute)(void*);

-	void* argument;

-} lsthread;

-void init_lsthread(lsthread* t);

-void start_lsthread(lsthread* t);

-void kill_lsthread(lsthread* t);

-void destroy_lsthread(lsthread* t);

-void lock(lsthread* t);

-void step(lsthread* t);

-void* lsthread_func(void* me_void);

-#ifdef LOCKSTEPTHREAD_IMPL

-//function declarations

-void init_lsthread(lsthread* t){

-	t->myMutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;

-	pthread_barrier_init(&t->myBarrier, NULL, 2);

-	t->isThreadLive = 0;

-	t->shouldKillThread = 0;

-	t->state = 0;

-	t->execute = NULL;

-	t->argument = NULL;

-}

-void destroy_lsthread(lsthread* t){

-	pthread_mutex_destroy(&t->myMutex);

-	pthread_barrier_destroy(&t->myBarrier);

-}

-void lock(lsthread* t){

-	if(t->state == 1)return;//if already locked, nono

-	if(!t->isThreadLive)return;

-	//exit(1)

-	pthread_barrier_wait(&t->myBarrier);

-	//exit(1)

-	if(pthread_mutex_lock(&t->myMutex))

-		exit(1);

-	t->state = 1;

-	//exit(1)

-}

-void step(lsthread* t){

-	if(t->state == -1)return; //if already stepping, nono

-	if(!t->isThreadLive)return;

-	//exit(1)

-	if(pthread_mutex_unlock(&(t->myMutex)))

-		exit(1);

-	//exit(1)

-	pthread_barrier_wait(&t->myBarrier);

-	t->state = -1;

-	//exit(1)

-}

-void kill_lsthread(lsthread* t){

-	if(!t->isThreadLive)return;

-	//exit(1)

-	if(t->state != 1){

-		lock(t);

-		//exit(1)

-	}

-	t->shouldKillThread = 1;

-	step(t);

-	//exit(1)

-	pthread_join(t->myThread,NULL);

-	//if(pthread_kill(t->myThread)){

-	//	exit(1)

-	//}

-	t->isThreadLive = 0;

-	t->shouldKillThread = 0;

-}

-void* lsthread_func(void* me_void){

-	lsthread* me = (lsthread*) me_void;

-	int ret = 0;

-	if (!me)pthread_exit(NULL);

-	while (1) {

-		//ret = pthread_cond_wait(&(me->myCond), &(me->myMutex));

-		pthread_barrier_wait(&me->myBarrier);

-		//exit(1)

-		pthread_mutex_lock(&me->myMutex);

-		//exit(1)

-		//if(ret)pthread_exit(NULL);

-		if (!(me->shouldKillThread) && me->execute)

-			me->execute(me->argument);

-		else if(me->shouldKillThread){

-			pthread_mutex_unlock(&me->myMutex);

-			//exit(1)

-			//pthread_barrier_wait(&me->myBarrier);

-			//exit(1)

-			pthread_exit(NULL);

-		}

-		//exit(1)

-		pthread_mutex_unlock(&me->myMutex);

-		//exit(1)

-		pthread_barrier_wait(&me->myBarrier);

-		//exit(1)

-	}

-	pthread_exit(NULL);

-}

-void start_lsthread(lsthread* t){

-	if(t->isThreadLive)return;

-	t->isThreadLive = 1;

-	t->shouldKillThread = 0;

-	if(pthread_mutex_lock(&t->myMutex))

-		exit(1);

-	t->state = 1; //LOCKED

-	pthread_create(

-		&t->myThread,

-		NULL,

-		lsthread_func,

-		(void*)t

-	);

-}

-#endif

-//end of implementation

-#endif

-//end of header

--- a/SDL_Examples/include/openimgui.h

+++ /dev/null

@@ -1,247 +1,0 @@

-#include <math.h>

-//PROTOTYPE FOR THE OPENIMGUISTANDARD PROPOSAL

-//Licensed to you under the CC0 license.

-//This is the standard for an intuitive immediate-mode gui specification which gracefully solves many of the shortcomings of

-//other immediate mode gui standards.

-//1) How elements are drawn across different environments

-//2) How keyboard/gamepad cursor navigation is handled

-//3) How the same GUI rendering code can be transported between backends.

-//This is a standard for immediate mode GUI elements which can be implemented anywhere and gracefully decreases in feature level based on platform.

-//If your target platform can render text and it can render boxes, then it can run openimgui.

-// The screen's top left corner is 0,0 and bottom right is 1,1

-// All coordinates and dimensions are specified relative to that.

-//HOW CURSOR BUTTON IS HANDLED~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-//Beginning of your frame...

-//omg_cb = 0;

-//if(just_touched || just_mouseleftbuttondown || just_button_down) omg_cb = 1; //Pressed!

-//if(just_released_touch || just_mouseleftbutton up || just_button_up) omg_cb = 2; //Released!

-//Gui code this frame...

-//HOW CURSOR POSITION IS HANDLED:

-// On platforms with touch or mouse input, the polling of cursor position will occur like this~~~~~~~~~~~~~

-// omg_cursor_has_been_sucked = 0;

-// omg_cursorpos[0] = device_cursorpos.x / (float) screenWidth;

-// omg_cursorpos[1] = device_cursorpos.x / (float) screenHeight;

-// Clamp the cursorpos (if necessary)

-// omg_cursorpos[0] = omg_clampf(omg_cursorpos[0]);

-// omg_cursorpos[1] = omg_clampf(omg_cursorpos[1]);

-// omg_cursorpos_presuck[0] = -1;

-// omg_cursorpos_presuck[1] = -1;

-// On platforms which use buttons to navigate menu elements...~~~~~~~~~~~~~

-// omg_cursor_has_been_sucked = 0;

-// if(buttonleft) omg_cursorpos[0] -= omg_buttonjump[0];

-// if(buttonright) omg_cursorpos[0] += omg_buttonjump[0];

-// if(buttonup) omg_cursorpos[1] -= omg_buttonjump[1];

-// if(buttondown) omg_cursorpos[1] += omg_buttonjump[1];

-// Clamp the cursorpos

-// omg_cursorpos[0] = omg_wrapf(omg_cursorpos[0]);

-// omg_cursorpos[1] = omg_wrapf(omg_cursorpos[1]);

-// omg_cursorpos_presuck[0] = omg_cursorpos[0];

-// omg_cursorpos_presuck[1] = omg_cursorpos[1];

-// HOW BUTTON SUCKING WORKS ~~~~~~~~~~~~~~

-// On platforms without cursor input such as game consoles, there needs to be an ergonomic way to navigate menus.

-// This is achieved by simulating a virtual mouse cursor in the game and "Sucking" it into the closest sucking box.

-// We keep track of the cursorposition every frame as well as the position before an attempt to "suck" it has been made.

-// This allows us to determine (By testing, for every graphical object) whether or not the cursorposition should be "sucked" into

-// the graphical object.

-// Normalized cursor position

-#ifndef OPENIMGUI_IMPL

-extern float omg_cursorpos[2]; //Defaults to zero

-extern float omg_cursorpos_presuck[2]; //Defaults to zero

-extern int omg_cursor_has_been_sucked;

-extern int omg_cursor_was_inside;  //Set

-extern float omg_buttonjump[2]; //Defaults to zero

-// Setting for users using

-extern int bstate_old;

-extern int udlr_old[4];

-// cursor button

-extern int omg_cb; //Set to zero every iteration.

-#else

-float omg_cursorpos[2]; //Defaults to zero

-float omg_cursorpos_presuck[2]; //Defaults to zero

-int omg_cursor_has_been_sucked;

-int omg_cursor_was_inside;  //Set

-float omg_buttonjump[2]; //Defaults to zero

-// Setting for users using

-int bstate_old = 0;

-int udlr_old[4] = {0,0,0,0};

-// cursor button

-int omg_cb; //Set to zero every iteration.

-#endif

-//Used for determining the closest button in sucking mode.

-static inline float omg_sqrlinelength(float x1, float y1, float x2, float y2){

-	return ((x1-x2) * (x1-x2) + (y1-y2) * (y1-y2));

-}

-//Used for clamping cursor position to the screen.

-static inline float omg_clampf(float x){

-	return (x>1.0)?1.0: (x<0.0)?0.0:x;

-}

-//Used for wrapping the cursor position to the screen in button cursor mode.

-static inline float omg_wrapf(float x){

-	float f = fmod(x, 1);

-	if(f<0.0) (f = 1.0 + f);

-	return f;

-}

-static inline void omg_update_keycursor(int _up, int _down, int _left, int _right, int bstate){

-	omg_cursor_was_inside = 0;

-	int up = _up && ! udlr_old[0];

-	int down = _down && ! udlr_old[1];

-	int left = _left && ! udlr_old[2];

-	int right = _right && ! udlr_old[3];

-	udlr_old[0] = _up;

-	udlr_old[1] = _down;

-	udlr_old[2] = _left;

-	udlr_old[3] = _right;

-	omg_cursor_has_been_sucked = 0;

-	omg_cursorpos_presuck[0] = omg_cursorpos[0];

-	omg_cursorpos_presuck[1] = omg_cursorpos[1];

-	if(up)   omg_cursorpos[1] -= omg_buttonjump[1];

-	if(down) omg_cursorpos[1] += omg_buttonjump[1];

-	if(left) omg_cursorpos[0] -= omg_buttonjump[0];

-	if(right)omg_cursorpos[0] += omg_buttonjump[0];

-	//Clamp the cursorpos

-	omg_cursorpos[0] = omg_wrapf(omg_cursorpos[0]);

-	omg_cursorpos[1] = omg_wrapf(omg_cursorpos[1]);

-	omg_cursorpos_presuck[0] = omg_cursorpos[0];

-	omg_cursorpos_presuck[1] = omg_cursorpos[1];

-	//printf("BEGIN! Cx = %f, Cy = %f\n", omg_cursorpos[0], omg_cursorpos[1]);

-	omg_cb = 0;

-	if(bstate && !bstate_old) omg_cb = 1;

-	else if (!bstate && bstate_old) omg_cb = 2;

-	bstate_old = bstate;

-}

-//for mouse cursors and touch input.

-static inline void omg_update_mcursor(float ncx, float ncy, int bstate){

-	omg_cursor_has_been_sucked = 0;

-	omg_cursor_was_inside = 0;

-	omg_cursorpos[0] = ncx;

-	omg_cursorpos[1] = ncy;

-	// Clamp the cursorpos (if necessary)

-	omg_cursorpos[0] = omg_clampf(omg_cursorpos[0]);

-	omg_cursorpos[1] = omg_clampf(omg_cursorpos[1]);

-	omg_cursorpos_presuck[0] = -1;

-	omg_cursorpos_presuck[1] = -1;

-	omg_cb = 0;

-	if(bstate && !bstate_old) omg_cb = 1;

-	else if (!bstate && bstate_old) omg_cb = 2;

-	bstate_old = bstate;

-}

-static inline int omg_boxtest(float x, float y, float xdim, float ydim, float cx, float cy){

-	if((x <= cx) &&

-			(x+xdim >= cx) &&

-			(y <= cy) &&

-			(y+ydim >= cy))

-		return 1;

-	return 0;

-}

-static inline int omg_box_retval(float x, float y, float xdim, float ydim){

-	if(omg_cursorpos_presuck[0] == -1)

-		return omg_boxtest(x,y,xdim,ydim,	omg_cursorpos[0],omg_cursorpos[1]);

-	return omg_boxtest(x,y,xdim,ydim,		omg_cursorpos_presuck[0],omg_cursorpos_presuck[1]);

-}

-static inline void omg_box_suck(float x, float y, float xdim, float ydim, int sucks, float buttonjumpx, float buttonjumpy){

-	 if(omg_cursorpos_presuck[0] != -1 && sucks){ //Do not attempt to suck if this graphical element does not suck or sucking is not enabled.

-		int btest = omg_boxtest(x,y,xdim,ydim, omg_cursorpos_presuck[0], omg_cursorpos_presuck[1]);

-		 if(!omg_cursor_has_been_sucked){

-		 	//We are free to try to suck up the cursor without a check.

-			omg_cursorpos[0] = x + xdim/2.0;

-			omg_cursorpos[1] = y + ydim/2.0;

-			omg_cursor_has_been_sucked = 1;

-		  	omg_buttonjump[0] = buttonjumpx;

-		  	omg_buttonjump[1] = buttonjumpy;

-		  	if(btest) omg_cursor_was_inside = 1;

-		  	//puts("Initial grab...\n");

-		  	//printf("Cx = %f, Cy = %f\n", omg_cursorpos[0], omg_cursorpos[1]);

-		} else if (

-		(!omg_cursor_was_inside && //Cursor was not inside.

-		omg_sqrlinelength(x+xdim/2.0, y+ydim/2.0, 			omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1]) <

-		           omg_sqrlinelength(omg_cursorpos[0], omg_cursorpos[1], omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1])

-		 ) || //Cursor was inside, if it's inside this one as well, pick the closest.

-		 (!omg_cursor_was_inside && btest) ||

-		  (

-		  	btest &&

-			omg_sqrlinelength(x+xdim/2.0, y+ydim/2.0, 			omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1]) <

-		    omg_sqrlinelength(omg_cursorpos[0], omg_cursorpos[1], omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1])

-		  )

-		           ){

-		           //The box is closer than the current suck position.

-			omg_cursorpos[0] = x+xdim/2.0;

-			omg_cursorpos[1] = y+ydim/2.0;

-			omg_cursor_has_been_sucked = 1;

-		  omg_buttonjump[0] = buttonjumpx;

-		  omg_buttonjump[1] = buttonjumpy;

-		  //if(boxtest(x,y,xdim,ydim)) omg_cursor_was_inside = 1;

-		  omg_cursor_was_inside = omg_boxtest(x,y,xdim,ydim, omg_cursorpos_presuck[0], omg_cursorpos_presuck[1]);

-		  //puts("Found a different button!\n");

-		  //printf("Cx = %f, Cy = %f\n", omg_cursorpos[0], omg_cursorpos[1]);

-		}

-	}

-}

-// OMG_BOX:

-// Draws a box on the screen.

-// Returns whether or not the cursor was inside it this frame (NOT IF IT GOT __SUCKED__ INSIDE IT!)

-// x,y are the top left corner.

-// xdim, ydim, are the width and height of the box.

-// hints is a set of implementation-specific parameters describing the nature of how the box is drawn,

-// sucks indicates whether or not the cursor position is "sucked" into the button (See: HOW BUTTON SUCKING WORKS)

-// buttonjumpx and buttonjumpy are the amount by which the cursor will jump in X and Y when pressing the menu navigation arrows.

-// The return value is determined like this:

-// if(omg_cursorpos_presuck[0] == -1) return omg_boxtest(omg_cursorpos) else

-//	return boxtest(omg_cursorpos_presuck)

-// The suck test works like this:

-// if(omg_cursorpos_presuck[0] != -1 && sucks){ //Do not attempt to suck if this graphical element does not suck or sucking is not enabled.

-// if(!omg_cursor_has_been_sucked){ //We are free to try to suck up the cursor without a check.

-//	omg_cursorpos[0] = x+xdim/2.0;

-//	omg_cursorpos[1] = y+ydim/2.0;

-//	omg_cursor_has_been_sucked = 1;

-//  omg_buttonjump[0] = buttonjumpx;

-//  omg_buttonjump[1] = buttonjumpy;

-//} else if (omg_sqrlinelength(x+xdim/2.0, y+ydim/2.0, omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1]) <

-//           omg_sqrlinelength(omg_cursorpos[0], omg_cursorpos[1], omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1])){ //The box is closer than the current suck position.

-//	omg_cursorpos[0] = x+xdim/2.0;

-//	omg_cursorpos[1] = y+ydim/2.0;

-//	omg_cursor_has_been_sucked = 1;

-//  omg_buttonjump[0] = buttonjumpx;

-//  omg_buttonjump[1] = buttonjumpy;

-//}}

-//When sucking is enabled (omg_cursorpos_presuck[0] != -1) the box test will be performed on cursorpos_presuck.

-//You can use the above static inline functions as a reference for your implementation.

-int omg_box(float x, float y, float xdim, float ydim, int sucks, float buttonjumpx, float buttonjumpy, int hints);

-// OMG_TEXTBOX:

-// Draws a box... with text in it

-// All the args are the same, and its return value is the same, except now it can draw text.

-// It should handle all the same hints as omg_box.

-// the hintstext variable should handle all

-// The textsize is an implementation-specific indication of how large the text in the box should be.

-// The x and y dimensions of the box are automatically deduced from text.

-// Text containing newlines will extend the Y dimension of the box,

-// and the longest line of text will determine the x dimension of the box.

-// Otherwise, it is functionally identical to omg_box.

-int omg_textbox(float x, float y, const char* text, int textsize, int sucks, float buttonjumpx, float buttonjumpy, int hints, int hintstext);

--- a/SDL_Examples/include/resweep.h

+++ /dev/null

@@ -1,307 +1,0 @@

-//unlicense'd

-/*

-This is free and unencumbered software released into the public domain.

-Anyone is free to copy, modify, publish, use, compile, sell, or

-distribute this software, either in source code form or as a compiled

-binary, for any purpose, commercial or non-commercial, and by any

-means.

-In jurisdictions that recognize copyright laws, the author or authors

-of this software dedicate any and all copyright interest in the

-software to the public domain. We make this dedication for the benefit

-of the public at large and to the detriment of our heirs and

-successors. We intend this dedication to be an overt act of

-relinquishment in perpetuity of all present and future rights to this

-software under copyright law.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR

-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,

-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

-OTHER DEALINGS IN THE SOFTWARE.

-For more information, please refer to <http://unlicense.org>

-*/

-#pragma once

-#ifdef __cplusplus

-extern "C" {

-#endif

-/******************************************************************************/

-/******************************************************************************/

-#ifdef __cplusplus

-}

-#endif

-#ifdef RESWEEP_IMPLEMENTATION

-#include <math.h>

-#ifndef M_PI

-#define M_PI   3.14159265358979323846

-#endif

-#ifndef M_1_PI

-#define	M_1_PI 0.31830988618379067154

-#endif

-#define SIDELOBE_HEIGHT 96

-#define UP_TRANSITION_WIDTH (1.0 / 32.0)

-#define DOWN_TRANSITION_WIDTH (1.0 / 128.0)

-#define MAX_SINC_WINDOW_SIZE 2048

-#define RESAMPLE_LUT_STEP 128

-typedef struct

-{

-	float value;

-	float delta;

-}

-lutEntry_t;

-lutEntry_t dynamicLut[RESAMPLE_LUT_STEP * MAX_SINC_WINDOW_SIZE];

-static inline unsigned int calc_gcd(unsigned int a, unsigned int b)

-{

-	while (b)

-	{

-		unsigned int t = b;

-		b = a % b;

-		a = t;

-	}

-	return a;

-}

-static inline double exact_nsinc(double x)

-{

-	if (x == 0.0)

-		return 1.0;

-	return ((double)(M_1_PI) / x) * sin(M_PI * x);

-}

-// Modified Bessel function of the first kind, order 0

-// https://ccrma.stanford.edu/~jos/sasp/Kaiser_Window.html

-static inline double I0(double x)

-{

-	double r = 1.0, xx = x * x, xpow = xx, coeff = 0.25;

-	int k;

-	// iterations until coeff ~= 0

-	// 19 for float32, 89 for float64, 880 for float80

-	for (k = 1; k < 89; k++)

-	{

-		r += xpow * coeff;

-		coeff /= (4 * k + 8) * k + 4;

-		xpow *= xx;

-	}

-	return r;

-}

-// https://ccrma.stanford.edu/~jos/sasp/Kaiser_Window.html

-static inline double kaiser(int n, int length, double beta)

-{

-	double mid = 2 * n / (double)(length - 1) - 1.0;

-	return I0(beta * sqrt(1.0 - mid * mid)) / I0(beta);

-}

-static inline void sinc_resample_createLut(int inFreq, int cutoffFreq2, int windowSize, double beta)

-{

-	double windowLut[windowSize];

-	double freqAdjust = (double)cutoffFreq2 / (double)inFreq;

-	lutEntry_t *out, *in;

-	int i, j;

-	for (i = 0; i < windowSize; i++)

-		windowLut[i] = kaiser(i, windowSize, beta);

-	out = dynamicLut;

-	for (i = 0; i < RESAMPLE_LUT_STEP; i++)

-	{

-		double offset = i / (double)(RESAMPLE_LUT_STEP - 1) - windowSize / 2;

-		double sum = 0.0;

-		for (j = 0; j < windowSize; j++)

-		{

-			double s = exact_nsinc((j + offset) * freqAdjust);

-			out->value = s * windowLut[j];

-			sum += s;

-			out++;

-		}

-		out -= windowSize;

-		for (j = 0; j < windowSize; j++)

-		{

-			out->value /= sum;

-			out++;

-		}

-	}

-	out = dynamicLut;

-	in = out + windowSize;

-	for (i = 0; i < RESAMPLE_LUT_STEP - 1; i++)

-	{

-		for (j = 0; j < windowSize; j++)

-		{

-			out->delta = in->value - out->value;

-			out++;

-			in++;

-		}

-	}

-	for (j = 0; j < windowSize; j++)

-	{

-		out->delta = 0;

-		out++;

-	}

-}

-static inline void sinc_resample_internal(short *wavOut, int sizeOut, int outFreq, const short *wavIn, int sizeIn, int inFreq, int cutoffFreq2, int numChannels, int windowSize, double beta)

-{

-	float y[windowSize * numChannels];

-	const short *sampleIn, *wavInEnd = wavIn + (sizeIn / 2);

-	short *sampleOut, *wavOutEnd = wavOut + (sizeOut / 2);

-	float outPeriod;

-	int subpos = 0;

-	int gcd = calc_gcd(inFreq, outFreq);

-	int i, c, next;

-	float dither[numChannels];

-	sinc_resample_createLut(inFreq, cutoffFreq2, windowSize, beta);

-	inFreq /= gcd;

-	outFreq /= gcd;

-	outPeriod = 1.0f / outFreq;

-	for (c = 0; c < numChannels; c++)

-		dither[c] = 0.0f;

-	for (i = 0; i < windowSize / 2 - 1; i++)

-	{

-		for (c = 0; c < numChannels; c++)

-			y[i * numChannels + c] = 0;

-	}

-	sampleIn = wavIn;

-	for (; i < windowSize; i++)

-	{

-		for (c = 0; c < numChannels; c++)

-			y[i * numChannels + c] = (sampleIn < wavInEnd) ? *sampleIn++ : 0;

-	}

-	sampleOut = wavOut;

-	next = 0;

-	while (sampleOut < wavOutEnd)

-	{

-		float samples[numChannels];

-		float offset = 1.0f - subpos * outPeriod;

-		float interp;

-		lutEntry_t *lutPart;

-		int index;

-		for (c = 0; c < numChannels; c++)

-			samples[c] = 0.0f;

-		interp = offset * (RESAMPLE_LUT_STEP - 1);

-		index = interp;

-		interp -= index;

-		lutPart = dynamicLut + index * windowSize;

-		for (i = next; i < windowSize; i++, lutPart++)

-		{

-			float scale = lutPart->value + lutPart->delta * interp;

-			for (c = 0; c < numChannels; c++)

-				samples[c] += y[i * numChannels + c] * scale;

-		}

-		for (i = 0; i < next; i++, lutPart++)

-		{

-			float scale = lutPart->value + lutPart->delta * interp;

-			for (c = 0; c < numChannels; c++)

-				samples[c] += y[i * numChannels + c] * scale;

-		}

-		for (c = 0; c < numChannels; c++)

-		{

-			float r = roundf(samples[c] + dither[c]);

-			dither[c] += samples[c] - r;

-			if (r > 32767)

-				*sampleOut++ = 32767;

-			else if (r < -32768)

-				*sampleOut++ = -32768;

-			else

-				*sampleOut++ = r;

-		}

-		subpos += inFreq;

-		while (subpos >= outFreq)

-		{

-			subpos -= outFreq;

-			for (c = 0; c < numChannels; c++)

-				y[next * numChannels + c] = (sampleIn < wavInEnd) ? *sampleIn++ : 0;

-			next = (next + 1) % windowSize;

-		}

-	}

-}

-void sinc_resample(short *wavOut, int sizeOut, int outFreq, const short *wavIn, int sizeIn, int inFreq, int numChannels)

-{

-	double sidelobeHeight = SIDELOBE_HEIGHT;

-	double transitionWidth;

-	double beta = 0.0;

-	int cutoffFreq2;

-	int windowSize;

-	// Just copy if no resampling necessary

-	if (outFreq == inFreq)

-	{

-		memcpy(wavOut, wavIn, (sizeOut < sizeIn) ? sizeOut : sizeIn);

-		return;

-	}

-	transitionWidth = (outFreq > inFreq) ? UP_TRANSITION_WIDTH : DOWN_TRANSITION_WIDTH;

-	// cutoff freq is ideally half transition width away from output freq

-	cutoffFreq2 = outFreq - transitionWidth * inFreq * 0.5;

-	// FIXME: Figure out why there are bad effects with cutoffFreq2 > inFreq

-	if (cutoffFreq2 > inFreq)

-		cutoffFreq2 = inFreq;

-	// https://www.mathworks.com/help/signal/ug/kaiser-window.html

-	if (sidelobeHeight > 50)

-		beta = 0.1102 * (sidelobeHeight - 8.7);

-	else if (sidelobeHeight >= 21)

-		beta = 0.5842 * pow(sidelobeHeight - 21.0, 0.4) + 0.07886 * (sidelobeHeight - 21.0);

-	windowSize = (sidelobeHeight - 8.0) / (2.285 * transitionWidth * M_PI) + 1;

-	if (windowSize > MAX_SINC_WINDOW_SIZE)

-		windowSize = MAX_SINC_WINDOW_SIZE;

-	// should compile as different paths

-	// number of channels need to be compiled as separate paths to ensure good

-	// vectorization by the compiler

-	if (numChannels == 1)

-		sinc_resample_internal(wavOut, sizeOut, outFreq, wavIn, sizeIn, inFreq, cutoffFreq2, 1, windowSize, beta);

-	else if (numChannels == 2)

-		sinc_resample_internal(wavOut, sizeOut, outFreq, wavIn, sizeIn, inFreq, cutoffFreq2, 2, windowSize, beta);

-	else

-		sinc_resample_internal(wavOut, sizeOut, outFreq, wavIn, sizeIn, inFreq, cutoffFreq2, numChannels, windowSize, beta);

-}

-#endif // RESWEEP_IMPLEMENTATION

--- a/SDL_Examples/include/stb_ds.h

+++ /dev/null

@@ -1,1880 +1,0 @@

-/* stb_ds.h - v0.65 - public domain data structures - Sean Barrett 2019

-   This is a single-header-file library that provides easy-to-use

-   dynamic arrays and hash tables for C (also works in C++).

-   For a gentle introduction:

-      http://nothings.org/stb_ds

-   To use this library, do this in *one* C or C++ file:

-      #define STB_DS_IMPLEMENTATION

-      #include "stb_ds.h"

-TABLE OF CONTENTS

-  Table of Contents

-  Compile-time options

-  License

-  Documentation

-  Notes

-  Notes - Dynamic arrays

-  Notes - Hash maps

-  Credits

-COMPILE-TIME OPTIONS

-  #define STBDS_NO_SHORT_NAMES

-     This flag needs to be set globally.

-     By default stb_ds exposes shorter function names that are not qualified

-     with the "stbds_" prefix. If these names conflict with the names in your

-     code, define this flag.

-  #define STBDS_SIPHASH_2_4

-     This flag only needs to be set in the file containing #define STB_DS_IMPLEMENTATION.

-     By default stb_ds.h hashes using a weaker variant of SipHash and a custom hash for

-     4- and 8-byte keys. On 64-bit platforms, you can define the above flag to force

-     stb_ds.h to use specification-compliant SipHash-2-4 for all keys. Doing so makes

-     hash table insertion about 20% slower on 4- and 8-byte keys, 5% slower on

-     64-byte keys, and 10% slower on 256-byte keys on my test computer.

-  #define STBDS_REALLOC(context,ptr,size) better_realloc

-  #define STBDS_FREE(context,ptr)         better_free

-     These defines only need to be set in the file containing #define STB_DS_IMPLEMENTATION.

-     By default stb_ds uses stdlib realloc() and free() for memory management. You can

-     substitute your own functions instead by defining these symbols. You must either

-     define both, or neither. Note that at the moment, 'context' will always be NULL.

-     @TODO add an array/hash initialization function that takes a memory context pointer.

-  #define STBDS_UNIT_TESTS

-     Defines a function stbds_unit_tests() that checks the functioning of the data structures.

-  Note that on older versions of gcc (e.g. 5.x.x) you may need to build with '-std=c++0x'

-     (or equivalentally '-std=c++11') when using anonymous structures as seen on the web

-     page or in STBDS_UNIT_TESTS.

-LICENSE

-  Placed in the public domain and also MIT licensed.

-  See end of file for detailed license information.

-DOCUMENTATION

-  Dynamic Arrays

-    Non-function interface:

-      Declare an empty dynamic array of type T

-        T* foo = NULL;

-      Access the i'th item of a dynamic array 'foo' of type T, T* foo:

-        foo[i]

-    Functions (actually macros)

-      arrfree:

-        void arrfree(T*);

-          Frees the array.

-      arrlen:

-        ptrdiff_t arrlen(T*);

-          Returns the number of elements in the array.

-      arrlenu:

-        size_t arrlenu(T*);

-          Returns the number of elements in the array as an unsigned type.

-      arrpop:

-        T arrpop(T* a)

-          Removes the final element of the array and returns it.

-      arrput:

-        T arrput(T* a, T b);

-          Appends the item b to the end of array a. Returns b.

-      arrins:

-        T arrins(T* a, int p, T b);

-          Inserts the item b into the middle of array a, into a[p],

-          moving the rest of the array over. Returns b.

-      arrinsn:

-        void arrins(T* a, int p, int n);

-          Inserts n uninitialized items into array a starting at a[p],

-          moving the rest of the array over.

-      arraddnptr:

-        T* arraddnptr(T* a, int n)

-          Appends n uninitialized items onto array at the end.

-          Returns a pointer to the first uninitialized item added.

-      arraddnindex:

-        size_t arraddnindex(T* a, int n)

-          Appends n uninitialized items onto array at the end.

-          Returns the index of the first uninitialized item added.

-      arrdel:

-        void arrdel(T* a, int p);

-          Deletes the element at a[p], moving the rest of the array over.

-      arrdeln:

-        void arrdel(T* a, int p, int n);

-          Deletes n elements starting at a[p], moving the rest of the array over.

-      arrdelswap:

-        void arrdelswap(T* a, int p);

-          Deletes the element at a[p], replacing it with the element from

-          the end of the array. O(1) performance.

-      arrsetlen:

-        void arrsetlen(T* a, int n);

-          Changes the length of the array to n. Allocates uninitialized

-          slots at the end if necessary.

-      arrsetcap:

-        size_t arrsetcap(T* a, int n);

-          Sets the length of allocated storage to at least n. It will not

-          change the length of the array.

-      arrcap:

-        size_t arrcap(T* a);

-          Returns the number of total elements the array can contain without

-          needing to be reallocated.

-  Hash maps & String hash maps

-    Given T is a structure type: struct { TK key; TV value; }. Note that some

-    functions do not require TV value and can have other fields. For string

-    hash maps, TK must be 'char *'.

-    Special interface:

-      stbds_rand_seed:

-        void stbds_rand_seed(size_t seed);

-          For security against adversarially chosen data, you should seed the

-          library with a strong random number. Or at least seed it with time().

-      stbds_hash_string:

-        size_t stbds_hash_string(char *str, size_t seed);

-          Returns a hash value for a string.

-      stbds_hash_bytes:

-        size_t stbds_hash_bytes(void *p, size_t len, size_t seed);

-          These functions hash an arbitrary number of bytes. The function

-          uses a custom hash for 4- and 8-byte data, and a weakened version

-          of SipHash for everything else. On 64-bit platforms you can get

-          specification-compliant SipHash-2-4 on all data by defining

-          STBDS_SIPHASH_2_4, at a significant cost in speed.

-    Non-function interface:

-      Declare an empty hash map of type T

-        T* foo = NULL;

-      Access the i'th entry in a hash table T* foo:

-        foo[i]

-    Function interface (actually macros):

-      hmfree

-      shfree

-        void hmfree(T*);

-        void shfree(T*);

-          Frees the hashmap and sets the pointer to NULL.

-      hmlen

-      shlen

-        ptrdiff_t hmlen(T*)

-        ptrdiff_t shlen(T*)

-          Returns the number of elements in the hashmap.

-      hmlenu

-      shlenu

-        size_t hmlenu(T*)

-        size_t shlenu(T*)

-          Returns the number of elements in the hashmap.

-      hmgeti

-      shgeti

-      hmgeti_ts

-        ptrdiff_t hmgeti(T*, TK key)

-        ptrdiff_t shgeti(T*, char* key)

-        ptrdiff_t hmgeti_ts(T*, TK key, ptrdiff_t tempvar)

-          Returns the index in the hashmap which has the key 'key', or -1

-          if the key is not present.

-      hmget

-      hmget_ts

-      shget

-        TV hmget(T*, TK key)

-        TV shget(T*, char* key)

-        TV hmget_ts(T*, TK key, ptrdiff_t tempvar)

-          Returns the value corresponding to 'key' in the hashmap.

-          The structure must have a 'value' field

-      hmgets

-      shgets

-        T hmgets(T*, TK key)

-        T shgets(T*, char* key)

-          Returns the structure corresponding to 'key' in the hashmap.

-      hmgetp

-      shgetp

-      hmgetp_ts

-      hmgetp_null

-      shgetp_null

-        T* hmgetp(T*, TK key)

-        T* shgetp(T*, char* key)

-        T* hmgetp_ts(T*, TK key, ptrdiff_t tempvar)

-        T* hmgetp_null(T*, TK key)

-        T* shgetp_null(T*, char *key)

-          Returns a pointer to the structure corresponding to 'key' in

-          the hashmap. Functions ending in "_null" return NULL if the key

-          is not present in the hashmap; the others return a pointer to a

-          structure holding the default value (but not the searched-for key).

-      hmdefault

-      shdefault

-        TV hmdefault(T*, TV value)

-        TV shdefault(T*, TV value)

-          Sets the default value for the hashmap, the value which will be

-          returned by hmget/shget if the key is not present.

-      hmdefaults

-      shdefaults

-        TV hmdefaults(T*, T item)

-        TV shdefaults(T*, T item)

-          Sets the default struct for the hashmap, the contents which will be

-          returned by hmgets/shgets if the key is not present.

-      hmput

-      shput

-        TV hmput(T*, TK key, TV value)

-        TV shput(T*, char* key, TV value)

-          Inserts a <key,value> pair into the hashmap. If the key is already

-          present in the hashmap, updates its value.

-      hmputs

-      shputs

-        T hmputs(T*, T item)

-        T shputs(T*, T item)

-          Inserts a struct with T.key into the hashmap. If the struct is already

-          present in the hashmap, updates it.

-      hmdel

-      shdel

-        int hmdel(T*, TK key)

-        int shdel(T*, char* key)

-          If 'key' is in the hashmap, deletes its entry and returns 1.

-          Otherwise returns 0.

-    Function interface (actually macros) for strings only:

-      sh_new_strdup

-        void sh_new_strdup(T*);

-          Overwrites the existing pointer with a newly allocated

-          string hashmap which will automatically allocate and free

-          each string key using realloc/free

-      sh_new_arena

-        void sh_new_arena(T*);

-          Overwrites the existing pointer with a newly allocated

-          string hashmap which will automatically allocate each string

-          key to a string arena. Every string key ever used by this

-          hash table remains in the arena until the arena is freed.

-          Additionally, any key which is deleted and reinserted will

-          be allocated multiple times in the string arena.

-NOTES

-  * These data structures are realloc'd when they grow, and the macro

-    "functions" write to the provided pointer. This means: (a) the pointer

-    must be an lvalue, and (b) the pointer to the data structure is not

-    stable, and you must maintain it the same as you would a realloc'd

-    pointer. For example, if you pass a pointer to a dynamic array to a

-    function which updates it, the function must return back the new

-    pointer to the caller. This is the price of trying to do this in C.

-  * The following are the only functions that are thread-safe on a single data

-    structure, i.e. can be run in multiple threads simultaneously on the same

-    data structure

-        hmlen        shlen

-        hmlenu       shlenu

-        hmget_ts     shget_ts

-        hmgeti_ts    shgeti_ts

-        hmgets_ts    shgets_ts

-  * You iterate over the contents of a dynamic array and a hashmap in exactly

-    the same way, using arrlen/hmlen/shlen:

-      for (i=0; i < arrlen(foo); ++i)

-         ... foo[i] ...

-  * All operations except arrins/arrdel are O(1) amortized, but individual

-    operations can be slow, so these data structures may not be suitable

-    for real time use. Dynamic arrays double in capacity as needed, so

-    elements are copied an average of once. Hash tables double/halve

-    their size as needed, with appropriate hysteresis to maintain O(1)

-    performance.

-NOTES - DYNAMIC ARRAY

-  * If you know how long a dynamic array is going to be in advance, you can avoid

-    extra memory allocations by using arrsetlen to allocate it to that length in

-    advance and use foo[n] while filling it out, or arrsetcap to allocate the memory

-    for that length and use arrput/arrpush as normal.

-  * Unlike some other versions of the dynamic array, this version should

-    be safe to use with strict-aliasing optimizations.

-NOTES - HASH MAP

-  * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel

-    and variants, the key must be an lvalue (so the macro can take the address of it).

-    Extensions are used that eliminate this requirement if you're using C99 and later

-    in GCC or clang, or if you're using C++ in GCC. But note that this can make your

-    code less portable.

-  * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'.

-  * The iteration order of your data in the hashmap is determined solely by the

-    order of insertions and deletions. In particular, if you never delete, new

-    keys are always added at the end of the array. This will be consistent

-    across all platforms and versions of the library. However, you should not

-    attempt to serialize the internal hash table, as the hash is not consistent

-    between different platforms, and may change with future versions of the library.

-  * Use sh_new_arena() for string hashmaps that you never delete from. Initialize

-    with NULL if you're managing the memory for your strings, or your strings are

-    never freed (at least until the hashmap is freed). Otherwise, use sh_new_strdup().

-    @TODO: make an arena variant that garbage collects the strings with a trivial

-    copy collector into a new arena whenever the table shrinks / rebuilds. Since

-    current arena recommendation is to only use arena if it never deletes, then

-    this can just replace current arena implementation.

-  * If adversarial input is a serious concern and you're on a 64-bit platform,

-    enable STBDS_SIPHASH_2_4 (see the 'Compile-time options' section), and pass

-    a strong random number to stbds_rand_seed.

-  * The default value for the hash table is stored in foo[-1], so if you

-    use code like 'hmget(T,k)->value = 5' you can accidentally overwrite

-    the value stored by hmdefault if 'k' is not present.

-CREDITS

-  Sean Barrett -- library, idea for dynamic array API/implementation

-  Per Vognsen  -- idea for hash table API/implementation

-  Rafael Sachetto -- arrpop()

-  github:HeroicKatora -- arraddn() reworking

-  Bugfixes:

-    Andy Durdin

-    Shane Liesegang

-    Vinh Truong

-    Andreas Molzer

-    github:hashitaku

-    github:srdjanstipic

-*/

-#ifdef STBDS_UNIT_TESTS

-#define _CRT_SECURE_NO_WARNINGS

-#endif

-#ifndef INCLUDE_STB_DS_H

-#define INCLUDE_STB_DS_H

-#include <stddef.h>

-#include <string.h>

-#ifndef STBDS_NO_SHORT_NAMES

-#define arrlen      stbds_arrlen

-#define arrlenu     stbds_arrlenu

-#define arrput      stbds_arrput

-#define arrpush     stbds_arrput

-#define arrpop      stbds_arrpop

-#define arrfree     stbds_arrfree

-#define arraddn     stbds_arraddn // deprecated, use one of the following instead:

-#define arraddnptr  stbds_arraddnptr

-#define arraddnindex stbds_arraddnindex

-#define arrsetlen   stbds_arrsetlen

-#define arrlast     stbds_arrlast

-#define arrins      stbds_arrins

-#define arrinsn     stbds_arrinsn

-#define arrdel      stbds_arrdel

-#define arrdeln     stbds_arrdeln

-#define arrdelswap  stbds_arrdelswap

-#define arrcap      stbds_arrcap

-#define arrsetcap   stbds_arrsetcap

-#define hmput       stbds_hmput

-#define hmputs      stbds_hmputs

-#define hmget       stbds_hmget

-#define hmget_ts    stbds_hmget_ts

-#define hmgets      stbds_hmgets

-#define hmgetp      stbds_hmgetp

-#define hmgetp_ts   stbds_hmgetp_ts

-#define hmgetp_null stbds_hmgetp_null

-#define hmgeti      stbds_hmgeti

-#define hmgeti_ts   stbds_hmgeti_ts

-#define hmdel       stbds_hmdel

-#define hmlen       stbds_hmlen

-#define hmlenu      stbds_hmlenu

-#define hmfree      stbds_hmfree

-#define hmdefault   stbds_hmdefault

-#define hmdefaults  stbds_hmdefaults

-#define shput       stbds_shput

-#define shputi      stbds_shputi

-#define shputs      stbds_shputs

-#define shget       stbds_shget

-#define shgeti      stbds_shgeti

-#define shgets      stbds_shgets

-#define shgetp      stbds_shgetp

-#define shgetp_null stbds_shgetp_null

-#define shdel       stbds_shdel

-#define shlen       stbds_shlen

-#define shlenu      stbds_shlenu

-#define shfree      stbds_shfree

-#define shdefault   stbds_shdefault

-#define shdefaults  stbds_shdefaults

-#define sh_new_arena  stbds_sh_new_arena

-#define sh_new_strdup stbds_sh_new_strdup

-#define stralloc    stbds_stralloc

-#define strreset    stbds_strreset

-#endif

-#if defined(STBDS_REALLOC) && !defined(STBDS_FREE) || !defined(STBDS_REALLOC) && defined(STBDS_FREE)

-#error "You must define both STBDS_REALLOC and STBDS_FREE, or neither."

-#endif

-#if !defined(STBDS_REALLOC) && !defined(STBDS_FREE)

-#include <stdlib.h>

-#define STBDS_REALLOC(c,p,s) realloc(p,s)

-#define STBDS_FREE(c,p)      free(p)

-#endif

-#ifdef _MSC_VER

-#define STBDS_NOTUSED(v)  (void)(v)

-#else

-#define STBDS_NOTUSED(v)  (void)sizeof(v)

-#endif

-#ifdef __cplusplus

-extern "C" {

-#endif

-// for security against attackers, seed the library with a random number, at least time() but stronger is better

-extern void stbds_rand_seed(size_t seed);

-// these are the hash functions used internally if you want to test them or use them for other purposes

-extern size_t stbds_hash_bytes(void *p, size_t len, size_t seed);

-extern size_t stbds_hash_string(char *str, size_t seed);

-// this is a simple string arena allocator, initialize with e.g. 'stbds_string_arena my_arena={0}'.

-typedef struct stbds_string_arena stbds_string_arena;

-extern char * stbds_stralloc(stbds_string_arena *a, char *str);

-extern void   stbds_strreset(stbds_string_arena *a);

-// have to #define STBDS_UNIT_TESTS to call this

-extern void stbds_unit_tests(void);

-///////////////

-//

-// Everything below here is implementation details

-//

-extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap);

-extern void   stbds_hmfree_func(void *p, size_t elemsize);

-extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);

-extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode);

-extern void * stbds_hmput_default(void *a, size_t elemsize);

-extern void * stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);

-extern void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode);

-extern void * stbds_shmode_func(size_t elemsize, int mode);

-#ifdef __cplusplus

-}

-#endif

-#if defined(__GNUC__) || defined(__clang__)

-#define STBDS_HAS_TYPEOF

-#ifdef __cplusplus

-//#define STBDS_HAS_LITERAL_ARRAY  // this is currently broken for clang

-#endif

-#endif

-#if !defined(__cplusplus)

-#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L

-#define STBDS_HAS_LITERAL_ARRAY

-#endif

-#endif

-// this macro takes the address of the argument, but on gcc/clang can accept rvalues

-#if defined(STBDS_HAS_LITERAL_ARRAY) && defined(STBDS_HAS_TYPEOF)

-  #if __clang__

-  #define STBDS_ADDRESSOF(typevar, value)     ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value

-  #else

-  #define STBDS_ADDRESSOF(typevar, value)     ((typeof(typevar)[1]){value}) // literal array decays to pointer to value

-  #endif

-#else

-#define STBDS_ADDRESSOF(typevar, value)     &(value)

-#endif

-#define STBDS_OFFSETOF(var,field)           ((char *) &(var)->field - (char *) (var))

-#define stbds_header(t)  ((stbds_array_header *) (t) - 1)

-#define stbds_temp(t)    stbds_header(t)->temp

-#define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table)

-#define stbds_arrsetcap(a,n)  (stbds_arrgrow(a,0,n))

-#define stbds_arrsetlen(a,n)  ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0)

-#define stbds_arrcap(a)       ((a) ? stbds_header(a)->capacity : 0)

-#define stbds_arrlen(a)       ((a) ? (ptrdiff_t) stbds_header(a)->length : 0)

-#define stbds_arrlenu(a)      ((a) ?             stbds_header(a)->length : 0)

-#define stbds_arrput(a,v)     (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v))

-#define stbds_arrpush         stbds_arrput  // synonym

-#define stbds_arrpop(a)       (stbds_header(a)->length--, (a)[stbds_header(a)->length])

-#define stbds_arraddn(a,n)    ((void)(stbds_arraddnoff(a, n)))    // deprecated, use one of the following instead:

-#define stbds_arraddnptr(a,n) (stbds_arrmaybegrow(a,n), stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)])

-#define stbds_arraddnoff(a,n) (stbds_arrmaybegrow(a,n), stbds_header(a)->length += (n), stbds_header(a)->length-(n))

-#define stbds_arrlast(a)      ((a)[stbds_header(a)->length-1])

-#define stbds_arrfree(a)      ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL)

-#define stbds_arrdel(a,i)     stbds_arrdeln(a,i,1)

-#define stbds_arrdeln(a,i,n)  (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n))

-#define stbds_arrdelswap(a,i) ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1)

-#define stbds_arrinsn(a,i,n)  (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i))))

-#define stbds_arrins(a,i,v)   (stbds_arrinsn((a),(i),1), (a)[i]=(v))

-#define stbds_arrmaybegrow(a,n)  ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \

-                                  ? (stbds_arrgrow(a,n,0),0) : 0)

-#define stbds_arrgrow(a,b,c)   ((a) = stbds_arrgrowf_wrapper((a), sizeof *(a), (b), (c)))

-#define stbds_hmput(t, k, v) \

-    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, 0),   \

-     (t)[stbds_temp((t)-1)].key = (k),    \

-     (t)[stbds_temp((t)-1)].value = (v))

-#define stbds_hmputs(t, s) \

-    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), &(s).key, sizeof (s).key, STBDS_HM_BINARY), \

-     (t)[stbds_temp((t)-1)] = (s))

-#define stbds_hmgeti(t,k) \

-    ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_HM_BINARY), \

-      stbds_temp((t)-1))

-#define stbds_hmgeti_ts(t,k,temp) \

-    ((t) = stbds_hmget_key_ts_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, &(temp), STBDS_HM_BINARY), \

-      (temp))

-#define stbds_hmgetp(t, k) \

-    ((void) stbds_hmgeti(t,k), &(t)[stbds_temp((t)-1)])

-#define stbds_hmgetp_ts(t, k, temp) \

-    ((void) stbds_hmgeti_ts(t,k,temp), &(t)[temp])

-#define stbds_hmdel(t,k) \

-    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_BINARY)),(t)?stbds_temp((t)-1):0)

-#define stbds_hmdefault(t, v) \

-    ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1].value = (v))

-#define stbds_hmdefaults(t, s) \

-    ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1] = (s))

-#define stbds_hmfree(p)        \

-    ((void) ((p) != NULL ? stbds_hmfree_func((p)-1,sizeof*(p)),0 : 0),(p)=NULL)

-#define stbds_hmgets(t, k)    (*stbds_hmgetp(t,k))

-#define stbds_hmget(t, k)     (stbds_hmgetp(t,k)->value)

-#define stbds_hmget_ts(t, k, temp)  (stbds_hmgetp_ts(t,k,temp)->value)

-#define stbds_hmlen(t)        ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0)

-#define stbds_hmlenu(t)       ((t) ?             stbds_header((t)-1)->length-1 : 0)

-#define stbds_hmgetp_null(t,k)  (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp(t)-1])

-#define stbds_shput(t, k, v) \

-    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \

-     (t)[stbds_temp((t)-1)].value = (v))

-#define stbds_shputi(t, k, v) \

-    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \

-     (t)[stbds_temp((t)-1)].value = (v), stbds_temp((t)-1))

-#define stbds_shputs(t, s) \

-    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (s).key, sizeof (s).key, STBDS_HM_STRING), \

-     (t)[stbds_temp((t)-1)] = (s), \

-     (t)[stbds_temp((t)-1)].key = stbds_temp_key((t)-1)) // above line overwrites whole structure, so must rewrite key here if it was allocated internally

-#define stbds_pshput(t, p) \

-    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (p)->key, sizeof (p)->key, STBDS_HM_PTR_TO_STRING), \

-     (t)[stbds_temp((t)-1)] = (p))

-#define stbds_shgeti(t,k) \

-     ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \

-      stbds_temp((t)-1))

-#define stbds_pshgeti(t,k) \

-     ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_HM_PTR_TO_STRING), \

-      stbds_temp((t)-1))

-#define stbds_shgetp(t, k) \

-    ((void) stbds_shgeti(t,k), &(t)[stbds_temp((t)-1)])

-#define stbds_pshget(t, k) \

-    ((void) stbds_pshgeti(t,k), (t)[stbds_temp((t)-1)])

-#define stbds_shdel(t,k) \

-    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_STRING)),(t)?stbds_temp((t)-1):0)

-#define stbds_pshdel(t,k) \

-    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_OFFSETOF(*(t),key), STBDS_HM_PTR_TO_STRING)),(t)?stbds_temp((t)-1):0)

-#define stbds_sh_new_arena(t)  \

-    ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_ARENA))

-#define stbds_sh_new_strdup(t) \

-    ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_STRDUP))

-#define stbds_shdefault(t, v)  stbds_hmdefault(t,v)

-#define stbds_shdefaults(t, s) stbds_hmdefaults(t,s)

-#define stbds_shfree       stbds_hmfree

-#define stbds_shlenu       stbds_hmlenu

-#define stbds_shgets(t, k) (*stbds_shgetp(t,k))

-#define stbds_shget(t, k)  (stbds_shgetp(t,k)->value)

-#define stbds_shgetp_null(t,k)  (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp(t)-1])

-#define stbds_shlen        stbds_hmlen

-typedef struct

-{

-  size_t      length;

-  size_t      capacity;

-  void      * hash_table;

-  ptrdiff_t   temp;

-} stbds_array_header;

-typedef struct stbds_string_block

-{

-  struct stbds_string_block *next;

-  char storage[8];

-} stbds_string_block;

-struct stbds_string_arena

-{

-  stbds_string_block *storage;

-  size_t remaining;

-  unsigned char block;

-  unsigned char mode;  // this isn't used by the string arena itself

-};

-#define STBDS_HM_BINARY         0

-#define STBDS_HM_STRING         1

-enum

-{

-   STBDS_SH_NONE,

-   STBDS_SH_DEFAULT,

-   STBDS_SH_STRDUP,

-   STBDS_SH_ARENA

-};

-#ifdef __cplusplus

-// in C we use implicit assignment from these void*-returning functions to T*.

-// in C++ these templates make the same code work

-template<class T> static T * stbds_arrgrowf_wrapper(T *a, size_t elemsize, size_t addlen, size_t min_cap) {

-  return (T*)stbds_arrgrowf((void *)a, elemsize, addlen, min_cap);

-}

-template<class T> static T * stbds_hmget_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) {

-  return (T*)stbds_hmget_key((void*)a, elemsize, key, keysize, mode);

-}

-template<class T> static T * stbds_hmget_key_ts_wrapper(T *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) {

-  return (T*)stbds_hmget_key_ts((void*)a, elemsize, key, keysize, temp, mode);

-}

-template<class T> static T * stbds_hmput_default_wrapper(T *a, size_t elemsize) {

-  return (T*)stbds_hmput_default((void *)a, elemsize);

-}

-template<class T> static T * stbds_hmput_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) {

-  return (T*)stbds_hmput_key((void*)a, elemsize, key, keysize, mode);

-}

-template<class T> static T * stbds_hmdel_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode){

-  return (T*)stbds_hmdel_key((void*)a, elemsize, key, keysize, keyoffset, mode);

-}

-template<class T> static T * stbds_shmode_func_wrapper(T *, size_t elemsize, int mode) {

-  return (T*)stbds_shmode_func(elemsize, mode);

-}

-#else

-#define stbds_arrgrowf_wrapper            stbds_arrgrowf

-#define stbds_hmget_key_wrapper           stbds_hmget_key

-#define stbds_hmget_key_ts_wrapper        stbds_hmget_key_ts

-#define stbds_hmput_default_wrapper       stbds_hmput_default

-#define stbds_hmput_key_wrapper           stbds_hmput_key

-#define stbds_hmdel_key_wrapper           stbds_hmdel_key

-#define stbds_shmode_func_wrapper(t,e,m)  stbds_shmode_func(e,m)

-#endif

-#endif // INCLUDE_STB_DS_H

-//////////////////////////////////////////////////////////////////////////////

-//

-//   IMPLEMENTATION

-//

-#ifdef STB_DS_IMPLEMENTATION

-#include <assert.h>

-#include <string.h>

-#ifndef STBDS_ASSERT

-#define STBDS_ASSERT_WAS_UNDEFINED

-#define STBDS_ASSERT(x)   ((void) 0)

-#endif

-#ifdef STBDS_STATISTICS

-#define STBDS_STATS(x)   x

-size_t stbds_array_grow;

-size_t stbds_hash_grow;

-size_t stbds_hash_shrink;

-size_t stbds_hash_rebuild;

-size_t stbds_hash_probes;

-size_t stbds_hash_alloc;

-size_t stbds_rehash_probes;

-size_t stbds_rehash_items;

-#else

-#define STBDS_STATS(x)

-#endif

-//

-// stbds_arr implementation

-//

-//int *prev_allocs[65536];

-//int num_prev;

-void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)

-{

-  void *b;

-  size_t min_len = stbds_arrlen(a) + addlen;

-  // compute the minimum capacity needed

-  if (min_len > min_cap)

-    min_cap = min_len;

-  if (min_cap <= stbds_arrcap(a))

-    return a;

-  // increase needed capacity to guarantee O(1) amortized

-  if (min_cap < 2 * stbds_arrcap(a))

-    min_cap = 2 * stbds_arrcap(a);

-  else if (min_cap < 4)

-    min_cap = 4;

-  //if (num_prev < 65536) if (a) prev_allocs[num_prev++] = (int *) ((char *) a+1);

-  //if (num_prev == 2201)

-  //  num_prev = num_prev;

-  b = STBDS_REALLOC(NULL, (a) ? stbds_header(a) : 0, elemsize * min_cap + sizeof(stbds_array_header));

-  //if (num_prev < 65536) prev_allocs[num_prev++] = (int *) (char *) b;

-  b = (char *) b + sizeof(stbds_array_header);

-  if (a == NULL) {

-    stbds_header(b)->length = 0;

-    stbds_header(b)->hash_table = 0;

-  } else {

-    STBDS_STATS(++stbds_array_grow);

-  }

-  stbds_header(b)->capacity = min_cap;

-  return b;

-}

-//

-// stbds_hm hash table implementation

-//

-#ifdef STBDS_INTERNAL_SMALL_BUCKET

-#define STBDS_BUCKET_LENGTH      4

-#else

-#define STBDS_BUCKET_LENGTH      8

-#endif

-#define STBDS_BUCKET_SHIFT      (STBDS_BUCKET_LENGTH == 8 ? 3 : 2)

-#define STBDS_BUCKET_MASK       (STBDS_BUCKET_LENGTH-1)

-#define STBDS_CACHE_LINE_SIZE   64

-#define STBDS_ALIGN_FWD(n,a)   (((n) + (a) - 1) & ~((a)-1))

-typedef struct

-{

-   size_t    hash [STBDS_BUCKET_LENGTH];

-   ptrdiff_t index[STBDS_BUCKET_LENGTH];

-} stbds_hash_bucket; // in 32-bit, this is one 64-byte cache line; in 64-bit, each array is one 64-byte cache line

-typedef struct

-{

-  char * temp_key; // this MUST be the first field of the hash table

-  size_t slot_count;

-  size_t used_count;

-  size_t used_count_threshold;

-  size_t used_count_shrink_threshold;

-  size_t tombstone_count;

-  size_t tombstone_count_threshold;

-  size_t seed;

-  size_t slot_count_log2;

-  stbds_string_arena string;

-  stbds_hash_bucket *storage; // not a separate allocation, just 64-byte aligned storage after this struct

-} stbds_hash_index;

-#define STBDS_INDEX_EMPTY    -1

-#define STBDS_INDEX_DELETED  -2

-#define STBDS_INDEX_IN_USE(x)  ((x) >= 0)

-#define STBDS_HASH_EMPTY      0

-#define STBDS_HASH_DELETED    1

-static size_t stbds_hash_seed=0x31415926;

-void stbds_rand_seed(size_t seed)

-{

-  stbds_hash_seed = seed;

-}

-#define stbds_load_32_or_64(var, temp, v32, v64_hi, v64_lo)                                          \

-  temp = v64_lo ^ v32, temp <<= 16, temp <<= 16, temp >>= 16, temp >>= 16, /* discard if 32-bit */   \

-  var = v64_hi, var <<= 16, var <<= 16,                                    /* discard if 32-bit */   \

-  var ^= temp ^ v32

-#define STBDS_SIZE_T_BITS           ((sizeof (size_t)) * 8)

-static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)

-{

-  size_t pos;

-  STBDS_NOTUSED(slot_log2);

-  pos = hash & (slot_count-1);

-  #ifdef STBDS_INTERNAL_BUCKET_START

-  pos &= ~STBDS_BUCKET_MASK;

-  #endif

-  return pos;

-}

-static size_t stbds_log2(size_t slot_count)

-{

-  size_t n=0;

-  while (slot_count > 1) {

-    slot_count >>= 1;

-    ++n;

-  }

-  return n;

-}

-static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_index *ot)

-{

-  stbds_hash_index *t;

-  t = (stbds_hash_index *) STBDS_REALLOC(NULL,0,(slot_count >> STBDS_BUCKET_SHIFT) * sizeof(stbds_hash_bucket) + sizeof(stbds_hash_index) + STBDS_CACHE_LINE_SIZE-1);

-  t->storage = (stbds_hash_bucket *) STBDS_ALIGN_FWD((size_t) (t+1), STBDS_CACHE_LINE_SIZE);

-  t->slot_count = slot_count;

-  t->slot_count_log2 = stbds_log2(slot_count);

-  t->tombstone_count = 0;

-  t->used_count = 0;

-  #if 0 // A1

-  t->used_count_threshold        = slot_count*12/16; // if 12/16th of table is occupied, grow

-  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild

-  t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink

-  #elif 1 // A2

-  //t->used_count_threshold        = slot_count*12/16; // if 12/16th of table is occupied, grow

-  //t->tombstone_count_threshold   = slot_count* 3/16; // if tombstones are 3/16th of table, rebuild

-  //t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink

-  // compute without overflowing

-  t->used_count_threshold        = slot_count - (slot_count>>2);

-  t->tombstone_count_threshold   = (slot_count>>3) + (slot_count>>4);

-  t->used_count_shrink_threshold = slot_count >> 2;

-  #elif 0 // B1

-  t->used_count_threshold        = slot_count*13/16; // if 13/16th of table is occupied, grow

-  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild

-  t->used_count_shrink_threshold = slot_count* 5/16; // if table is only 5/16th full, shrink

-  #else // C1

-  t->used_count_threshold        = slot_count*14/16; // if 14/16th of table is occupied, grow

-  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild

-  t->used_count_shrink_threshold = slot_count* 6/16; // if table is only 6/16th full, shrink

-  #endif

-  // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2

-    // Note that the larger tables have high variance as they were run fewer times

-  //     A1            A2          B1           C1

-  //    0.10ms :     0.10ms :     0.10ms :     0.11ms :      2,000 inserts creating 2K table

-  //    0.96ms :     0.95ms :     0.97ms :     1.04ms :     20,000 inserts creating 20K table

-  //   14.48ms :    14.46ms :    10.63ms :    11.00ms :    200,000 inserts creating 200K table

-  //  195.74ms :   196.35ms :   203.69ms :   214.92ms :  2,000,000 inserts creating 2M table

-  // 2193.88ms :  2209.22ms :  2285.54ms :  2437.17ms : 20,000,000 inserts creating 20M table

-  //   65.27ms :    53.77ms :    65.33ms :    65.47ms : 500,000 inserts & deletes in 2K table

-  //   72.78ms :    62.45ms :    71.95ms :    72.85ms : 500,000 inserts & deletes in 20K table

-  //   89.47ms :    77.72ms :    96.49ms :    96.75ms : 500,000 inserts & deletes in 200K table

-  //   97.58ms :    98.14ms :    97.18ms :    97.53ms : 500,000 inserts & deletes in 2M table

-  //  118.61ms :   119.62ms :   120.16ms :   118.86ms : 500,000 inserts & deletes in 20M table

-  //  192.11ms :   194.39ms :   196.38ms :   195.73ms : 500,000 inserts & deletes in 200M table

-  if (slot_count <= STBDS_BUCKET_LENGTH)

-    t->used_count_shrink_threshold = 0;

-  // to avoid infinite loop, we need to guarantee that at least one slot is empty and will terminate probes

-  STBDS_ASSERT(t->used_count_threshold + t->tombstone_count_threshold < t->slot_count);

-  STBDS_STATS(++stbds_hash_alloc);

-  if (ot) {

-    t->string = ot->string;

-    // reuse old seed so we can reuse old hashes so below "copy out old data" doesn't do any hashing

-    t->seed = ot->seed;

-  } else {

-    size_t a,b,temp;

-    memset(&t->string, 0, sizeof(t->string));

-    t->seed = stbds_hash_seed;

-    // LCG

-    // in 32-bit, a =          2147001325   b =  715136305

-    // in 64-bit, a = 2862933555777941757   b = 3037000493

-    stbds_load_32_or_64(a,temp, 2147001325, 0x27bb2ee6, 0x87b0b0fd);

-    stbds_load_32_or_64(b,temp,  715136305,          0, 0xb504f32d);

-    stbds_hash_seed = stbds_hash_seed  * a + b;

-  }

-  {

-    size_t i,j;

-    for (i=0; i < slot_count >> STBDS_BUCKET_SHIFT; ++i) {

-      stbds_hash_bucket *b = &t->storage[i];

-      for (j=0; j < STBDS_BUCKET_LENGTH; ++j)

-        b->hash[j] = STBDS_HASH_EMPTY;

-      for (j=0; j < STBDS_BUCKET_LENGTH; ++j)

-        b->index[j] = STBDS_INDEX_EMPTY;

-    }

-  }

-  // copy out the old data, if any

-  if (ot) {

-    size_t i,j;

-    t->used_count = ot->used_count;

-    for (i=0; i < ot->slot_count >> STBDS_BUCKET_SHIFT; ++i) {

-      stbds_hash_bucket *ob = &ot->storage[i];

-      for (j=0; j < STBDS_BUCKET_LENGTH; ++j) {

-        if (STBDS_INDEX_IN_USE(ob->index[j])) {

-          size_t hash = ob->hash[j];

-          size_t pos = stbds_probe_position(hash, t->slot_count, t->slot_count_log2);

-          size_t step = STBDS_BUCKET_LENGTH;

-          STBDS_STATS(++stbds_rehash_items);

-          for (;;) {

-            size_t limit,z;

-            stbds_hash_bucket *bucket;

-            bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];

-            STBDS_STATS(++stbds_rehash_probes);

-            for (z=pos & STBDS_BUCKET_MASK; z < STBDS_BUCKET_LENGTH; ++z) {

-              if (bucket->hash[z] == 0) {

-                bucket->hash[z] = hash;

-                bucket->index[z] = ob->index[j];

-                goto done;

-              }

-            }

-            limit = pos & STBDS_BUCKET_MASK;

-            for (z = 0; z < limit; ++z) {

-              if (bucket->hash[z] == 0) {

-                bucket->hash[z] = hash;

-                bucket->index[z] = ob->index[j];

-                goto done;

-              }

-            }

-            pos += step;                  // quadratic probing

-            step += STBDS_BUCKET_LENGTH;

-            pos &= (t->slot_count-1);

-          }

-        }

-       done:

-        ;

-      }

-    }

-  }

-  return t;

-}

-#define STBDS_ROTATE_LEFT(val, n)   (((val) << (n)) | ((val) >> (STBDS_SIZE_T_BITS - (n))))

-#define STBDS_ROTATE_RIGHT(val, n)  (((val) >> (n)) | ((val) << (STBDS_SIZE_T_BITS - (n))))

-size_t stbds_hash_string(char *str, size_t seed)

-{

-  size_t hash = seed;

-  while (*str)

-     hash = STBDS_ROTATE_LEFT(hash, 9) + (unsigned char) *str++;

-  // Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits

-  hash ^= seed;

-  hash = (~hash) + (hash << 18);

-  hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,31);

-  hash = hash * 21;

-  hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,11);

-  hash += (hash << 6);

-  hash ^= STBDS_ROTATE_RIGHT(hash,22);

-  return hash+seed;

-}

-#ifdef STBDS_SIPHASH_2_4

-#define STBDS_SIPHASH_C_ROUNDS 2

-#define STBDS_SIPHASH_D_ROUNDS 4

-typedef int STBDS_SIPHASH_2_4_can_only_be_used_in_64_bit_builds[sizeof(size_t) == 8 ? 1 : -1];

-#endif

-#ifndef STBDS_SIPHASH_C_ROUNDS

-#define STBDS_SIPHASH_C_ROUNDS 1

-#endif

-#ifndef STBDS_SIPHASH_D_ROUNDS

-#define STBDS_SIPHASH_D_ROUNDS 1

-#endif

-#ifdef _MSC_VER

-#pragma warning(push)

-#pragma warning(disable:4127) // conditional expression is constant, for do..while(0) and sizeof()==

-#endif

-static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed)

-{

-  unsigned char *d = (unsigned char *) p;

-  size_t i,j;

-  size_t v0,v1,v2,v3, data;

-  // hash that works on 32- or 64-bit registers without knowing which we have

-  // (computes different results on 32-bit and 64-bit platform)

-  // derived from siphash, but on 32-bit platforms very different as it uses 4 32-bit state not 4 64-bit

-  v0 = ((((size_t) 0x736f6d65 << 16) << 16) + 0x70736575) ^  seed;

-  v1 = ((((size_t) 0x646f7261 << 16) << 16) + 0x6e646f6d) ^ ~seed;

-  v2 = ((((size_t) 0x6c796765 << 16) << 16) + 0x6e657261) ^  seed;

-  v3 = ((((size_t) 0x74656462 << 16) << 16) + 0x79746573) ^ ~seed;

-  #ifdef STBDS_TEST_SIPHASH_2_4

-  // hardcoded with key material in the siphash test vectors

-  v0 ^= 0x0706050403020100ull ^  seed;

-  v1 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed;

-  v2 ^= 0x0706050403020100ull ^  seed;

-  v3 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed;

-  #endif

-  #define STBDS_SIPROUND() \

-    do {                   \

-      v0 += v1; v1 = STBDS_ROTATE_LEFT(v1, 13);  v1 ^= v0; v0 = STBDS_ROTATE_LEFT(v0,STBDS_SIZE_T_BITS/2); \

-      v2 += v3; v3 = STBDS_ROTATE_LEFT(v3, 16);  v3 ^= v2;                                                 \

-      v2 += v1; v1 = STBDS_ROTATE_LEFT(v1, 17);  v1 ^= v2; v2 = STBDS_ROTATE_LEFT(v2,STBDS_SIZE_T_BITS/2); \

-      v0 += v3; v3 = STBDS_ROTATE_LEFT(v3, 21);  v3 ^= v0;                                                 \

-    } while (0)

-  for (i=0; i+sizeof(size_t) <= len; i += sizeof(size_t), d += sizeof(size_t)) {

-    data = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);

-    data |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // discarded if size_t == 4

-    v3 ^= data;

-    for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j)

-      STBDS_SIPROUND();

-    v0 ^= data;

-  }

-  data = len << (STBDS_SIZE_T_BITS-8);

-  switch (len - i) {

-    case 7: data |= ((size_t) d[6] << 24) << 24; // fall through

-    case 6: data |= ((size_t) d[5] << 20) << 20; // fall through

-    case 5: data |= ((size_t) d[4] << 16) << 16; // fall through

-    case 4: data |= (d[3] << 24); // fall through

-    case 3: data |= (d[2] << 16); // fall through

-    case 2: data |= (d[1] << 8); // fall through

-    case 1: data |= d[0]; // fall through

-    case 0: break;

-  }

-  v3 ^= data;

-  for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j)

-    STBDS_SIPROUND();

-  v0 ^= data;

-  v2 ^= 0xff;

-  for (j=0; j < STBDS_SIPHASH_D_ROUNDS; ++j)

-    STBDS_SIPROUND();

-#ifdef STBDS_SIPHASH_2_4

-  return v0^v1^v2^v3;

-#else

-  return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply

-#endif

-}

-size_t stbds_hash_bytes(void *p, size_t len, size_t seed)

-{

-#ifdef STBDS_SIPHASH_2_4

-  return stbds_siphash_bytes(p,len,seed);

-#else

-  unsigned char *d = (unsigned char *) p;

-  if (len == 4) {

-    unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);

-    #if 0

-    // HASH32-A  Bob Jenkin's hash function w/o large constants

-    hash ^= seed;

-    hash -= (hash<<6);

-    hash ^= (hash>>17);

-    hash -= (hash<<9);

-    hash ^= seed;

-    hash ^= (hash<<4);

-    hash -= (hash<<3);

-    hash ^= (hash<<10);

-    hash ^= (hash>>15);

-    #elif 1

-    // HASH32-BB  Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.

-    // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm

-    // not really sure what's going on.

-    hash ^= seed;

-    hash = (hash ^ 61) ^ (hash >> 16);

-    hash = hash + (hash << 3);

-    hash = hash ^ (hash >> 4);

-    hash = hash * 0x27d4eb2d;

-    hash ^= seed;

-    hash = hash ^ (hash >> 15);

-    #else  // HASH32-C   -  Murmur3

-    hash ^= seed;

-    hash *= 0xcc9e2d51;

-    hash = (hash << 17) | (hash >> 15);

-    hash *= 0x1b873593;

-    hash ^= seed;

-    hash = (hash << 19) | (hash >> 13);

-    hash = hash*5 + 0xe6546b64;

-    hash ^= hash >> 16;

-    hash *= 0x85ebca6b;

-    hash ^= seed;

-    hash ^= hash >> 13;

-    hash *= 0xc2b2ae35;

-    hash ^= hash >> 16;

-    #endif

-    // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2

-    // Note that the larger tables have high variance as they were run fewer times

-    //  HASH32-A   //  HASH32-BB  //  HASH32-C

-    //    0.10ms   //    0.10ms   //    0.10ms :      2,000 inserts creating 2K table

-    //    0.96ms   //    0.95ms   //    0.99ms :     20,000 inserts creating 20K table

-    //   14.69ms   //   14.43ms   //   14.97ms :    200,000 inserts creating 200K table

-    //  199.99ms   //  195.36ms   //  202.05ms :  2,000,000 inserts creating 2M table

-    // 2234.84ms   // 2187.74ms   // 2240.38ms : 20,000,000 inserts creating 20M table

-    //   55.68ms   //   53.72ms   //   57.31ms : 500,000 inserts & deletes in 2K table

-    //   63.43ms   //   61.99ms   //   65.73ms : 500,000 inserts & deletes in 20K table

-    //   80.04ms   //   77.96ms   //   81.83ms : 500,000 inserts & deletes in 200K table

-    //  100.42ms   //   97.40ms   //  102.39ms : 500,000 inserts & deletes in 2M table

-    //  119.71ms   //  120.59ms   //  121.63ms : 500,000 inserts & deletes in 20M table

-    //  185.28ms   //  195.15ms   //  187.74ms : 500,000 inserts & deletes in 200M table

-    //   15.58ms   //   14.79ms   //   15.52ms : 200,000 inserts creating 200K table with varying key spacing

-    return (((size_t) hash << 16 << 16) | hash) ^ seed;

-  } else if (len == 8 && sizeof(size_t) == 8) {

-    size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);

-    hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4

-    hash ^= seed;

-    hash = (~hash) + (hash << 21);

-    hash ^= STBDS_ROTATE_RIGHT(hash,24);

-    hash *= 265;

-    hash ^= STBDS_ROTATE_RIGHT(hash,14);

-    hash ^= seed;

-    hash *= 21;

-    hash ^= STBDS_ROTATE_RIGHT(hash,28);

-    hash += (hash << 31);

-    hash = (~hash) + (hash << 18);

-    return hash;

-  } else {

-    return stbds_siphash_bytes(p,len,seed);

-  }

-#endif

-}

-#ifdef _MSC_VER

-#pragma warning(pop)

-#endif

-static int stbds_is_key_equal(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode, size_t i)

-{

-  if (mode >= STBDS_HM_STRING)

-    return 0==strcmp((char *) key, * (char **) ((char *) a + elemsize*i + keyoffset));

-  else

-    return 0==memcmp(key, (char *) a + elemsize*i + keyoffset, keysize);

-}

-#define STBDS_HASH_TO_ARR(x,elemsize) ((char*) (x) - (elemsize))

-#define STBDS_ARR_TO_HASH(x,elemsize) ((char*) (x) + (elemsize))

-#define stbds_hash_table(a)  ((stbds_hash_index *) stbds_header(a)->hash_table)

-void stbds_hmfree_func(void *a, size_t elemsize)

-{

-  if (a == NULL) return;

-  if (stbds_hash_table(a) != NULL) {

-    if (stbds_hash_table(a)->string.mode == STBDS_SH_STRDUP) {

-      size_t i;

-      // skip 0th element, which is default

-      for (i=1; i < stbds_header(a)->length; ++i)

-        STBDS_FREE(NULL, *(char**) ((char *) a + elemsize*i));

-    }

-    stbds_strreset(&stbds_hash_table(a)->string);

-  }

-  STBDS_FREE(NULL, stbds_header(a)->hash_table);

-  STBDS_FREE(NULL, stbds_header(a));

-}

-static ptrdiff_t stbds_hm_find_slot(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode)

-{

-  void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);

-  stbds_hash_index *table = stbds_hash_table(raw_a);

-  size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);

-  size_t step = STBDS_BUCKET_LENGTH;

-  size_t limit,i;

-  size_t pos;

-  stbds_hash_bucket *bucket;

-  if (hash < 2) hash += 2; // stored hash values are forbidden from being 0, so we can detect empty slots

-  pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);

-  for (;;) {

-    STBDS_STATS(++stbds_hash_probes);

-    bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];

-    // start searching from pos to end of bucket, this should help performance on small hash tables that fit in cache

-    for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) {

-      if (bucket->hash[i] == hash) {

-        if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

-          return (pos & ~STBDS_BUCKET_MASK)+i;

-        }

-      } else if (bucket->hash[i] == STBDS_HASH_EMPTY) {

-        return -1;

-      }

-    }

-    // search from beginning of bucket to pos

-    limit = pos & STBDS_BUCKET_MASK;

-    for (i = 0; i < limit; ++i) {

-      if (bucket->hash[i] == hash) {

-        if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

-          return (pos & ~STBDS_BUCKET_MASK)+i;

-        }

-      } else if (bucket->hash[i] == STBDS_HASH_EMPTY) {

-        return -1;

-      }

-    }

-    // quadratic probing

-    pos += step;

-    step += STBDS_BUCKET_LENGTH;

-    pos &= (table->slot_count-1);

-  }

-  /* NOTREACHED */

-}

-void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode)

-{

-  size_t keyoffset = 0;

-  if (a == NULL) {

-    // make it non-empty so we can return a temp

-    a = stbds_arrgrowf(0, elemsize, 0, 1);

-    stbds_header(a)->length += 1;

-    memset(a, 0, elemsize);

-    *temp = STBDS_INDEX_EMPTY;

-    // adjust a to point after the default element

-    return STBDS_ARR_TO_HASH(a,elemsize);

-  } else {

-    stbds_hash_index *table;

-    void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);

-    // adjust a to point to the default element

-    table = (stbds_hash_index *) stbds_header(raw_a)->hash_table;

-    if (table == 0) {

-      *temp = -1;

-    } else {

-      ptrdiff_t slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode);

-      if (slot < 0) {

-        *temp = STBDS_INDEX_EMPTY;

-      } else {

-        stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT];

-        *temp = b->index[slot & STBDS_BUCKET_MASK];

-      }

-    }

-    return a;

-  }

-}

-void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode)

-{

-  ptrdiff_t temp;

-  void *p = stbds_hmget_key_ts(a, elemsize, key, keysize, &temp, mode);

-  stbds_temp(STBDS_HASH_TO_ARR(p,elemsize)) = temp;

-  return p;

-}

-void * stbds_hmput_default(void *a, size_t elemsize)

-{

-  // three cases:

-  //   a is NULL <- allocate

-  //   a has a hash table but no entries, because of shmode <- grow

-  //   a has entries <- do nothing

-  if (a == NULL || stbds_header(STBDS_HASH_TO_ARR(a,elemsize))->length == 0) {

-    a = stbds_arrgrowf(a ? STBDS_HASH_TO_ARR(a,elemsize) : NULL, elemsize, 0, 1);

-    stbds_header(a)->length += 1;

-    memset(a, 0, elemsize);

-    a=STBDS_ARR_TO_HASH(a,elemsize);

-  }

-  return a;

-}

-static char *stbds_strdup(char *str);

-void *stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode)

-{

-  size_t keyoffset=0;

-  void *raw_a;

-  stbds_hash_index *table;

-  if (a == NULL) {

-    a = stbds_arrgrowf(0, elemsize, 0, 1);

-    memset(a, 0, elemsize);

-    stbds_header(a)->length += 1;

-    // adjust a to point AFTER the default element

-    a = STBDS_ARR_TO_HASH(a,elemsize);

-  }

-  // adjust a to point to the default element

-  raw_a = a;

-  a = STBDS_HASH_TO_ARR(a,elemsize);

-  table = (stbds_hash_index *) stbds_header(a)->hash_table;

-  if (table == NULL || table->used_count >= table->used_count_threshold) {

-    stbds_hash_index *nt;

-    size_t slot_count;

-    slot_count = (table == NULL) ? STBDS_BUCKET_LENGTH : table->slot_count*2;

-    nt = stbds_make_hash_index(slot_count, table);

-    if (table)

-      STBDS_FREE(NULL, table);

-    else

-      nt->string.mode = mode >= STBDS_HM_STRING ? STBDS_SH_DEFAULT : 0;

-    stbds_header(a)->hash_table = table = nt;

-    STBDS_STATS(++stbds_hash_grow);

-  }

-  // we iterate hash table explicitly because we want to track if we saw a tombstone

-  {

-    size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);

-    size_t step = STBDS_BUCKET_LENGTH;

-    size_t limit,i;

-    size_t pos;

-    ptrdiff_t tombstone = -1;

-    stbds_hash_bucket *bucket;

-    // stored hash values are forbidden from being 0, so we can detect empty slots to early out quickly

-    if (hash < 2) hash += 2;

-    pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);

-    for (;;) {

-      STBDS_STATS(++stbds_hash_probes);

-      bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];

-      // start searching from pos to end of bucket

-      for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) {

-        if (bucket->hash[i] == hash) {

-          if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

-            stbds_temp(a) = bucket->index[i];

-            return STBDS_ARR_TO_HASH(a,elemsize);

-          }

-        } else if (bucket->hash[i] == 0) {

-          pos = (pos & ~STBDS_BUCKET_MASK) + i;

-          goto found_empty_slot;

-        } else if (tombstone < 0) {

-          if (bucket->index[i] == STBDS_INDEX_DELETED)

-            tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i);

-        }

-      }

-      // search from beginning of bucket to pos

-      limit = pos & STBDS_BUCKET_MASK;

-      for (i = 0; i < limit; ++i) {

-        if (bucket->hash[i] == hash) {

-          if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

-            stbds_temp(a) = bucket->index[i];

-            return STBDS_ARR_TO_HASH(a,elemsize);

-          }

-        } else if (bucket->hash[i] == 0) {

-          pos = (pos & ~STBDS_BUCKET_MASK) + i;

-          goto found_empty_slot;

-        } else if (tombstone < 0) {

-          if (bucket->index[i] == STBDS_INDEX_DELETED)

-            tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i);

-        }

-      }

-      // quadratic probing

-      pos += step;

-      step += STBDS_BUCKET_LENGTH;

-      pos &= (table->slot_count-1);

-    }

-   found_empty_slot:

-    if (tombstone >= 0) {

-      pos = tombstone;

-      --table->tombstone_count;

-    }

-    ++table->used_count;

-    {

-      ptrdiff_t i = (ptrdiff_t) stbds_arrlen(a);

-      // we want to do stbds_arraddn(1), but we can't use the macros since we don't have something of the right type

-      if ((size_t) i+1 > stbds_arrcap(a))

-        *(void **) &a = stbds_arrgrowf(a, elemsize, 1, 0);

-      raw_a = STBDS_ARR_TO_HASH(a,elemsize);

-      STBDS_ASSERT((size_t) i+1 <= stbds_arrcap(a));

-      stbds_header(a)->length = i+1;

-      bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];

-      bucket->hash[pos & STBDS_BUCKET_MASK] = hash;

-      bucket->index[pos & STBDS_BUCKET_MASK] = i-1;

-      stbds_temp(a) = i-1;

-      switch (table->string.mode) {

-         case STBDS_SH_STRDUP:  stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_strdup((char*) key); break;

-         case STBDS_SH_ARENA:   stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_stralloc(&table->string, (char*)key); break;

-         case STBDS_SH_DEFAULT: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = (char *) key; break;

-         default:                memcpy((char *) a + elemsize*i, key, keysize); break;

-      }

-    }

-    return STBDS_ARR_TO_HASH(a,elemsize);

-  }

-}

-void * stbds_shmode_func(size_t elemsize, int mode)

-{

-  void *a = stbds_arrgrowf(0, elemsize, 0, 1);

-  stbds_hash_index *h;

-  memset(a, 0, elemsize);

-  stbds_header(a)->length = 1;

-  stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);

-  h->string.mode = (unsigned char) mode;

-  return STBDS_ARR_TO_HASH(a,elemsize);

-}

-void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode)

-{

-  if (a == NULL) {

-    return 0;

-  } else {

-    stbds_hash_index *table;

-    void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);

-    table = (stbds_hash_index *) stbds_header(raw_a)->hash_table;

-    stbds_temp(raw_a) = 0;

-    if (table == 0) {

-      return a;

-    } else {

-      ptrdiff_t slot;

-      slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode);

-      if (slot < 0)

-        return a;

-      else {

-        stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT];

-        int i = slot & STBDS_BUCKET_MASK;

-        ptrdiff_t old_index = b->index[i];

-        ptrdiff_t final_index = (ptrdiff_t) stbds_arrlen(raw_a)-1-1; // minus one for the raw_a vs a, and minus one for 'last'

-        STBDS_ASSERT(slot < (ptrdiff_t) table->slot_count);

-        --table->used_count;

-        ++table->tombstone_count;

-        stbds_temp(raw_a) = 1;

-        STBDS_ASSERT(table->used_count >= 0);

-        //STBDS_ASSERT(table->tombstone_count < table->slot_count/4);

-        b->hash[i] = STBDS_HASH_DELETED;

-        b->index[i] = STBDS_INDEX_DELETED;

-        if (mode == STBDS_HM_STRING && table->string.mode == STBDS_SH_STRDUP)

-          STBDS_FREE(NULL, *(char**) ((char *) a+elemsize*old_index));

-        // if indices are the same, memcpy is a no-op, but back-pointer-fixup will fail, so skip

-        if (old_index != final_index) {

-          // swap delete

-          memmove((char*) a + elemsize*old_index, (char*) a + elemsize*final_index, elemsize);

-          // now find the slot for the last element

-          if (mode == STBDS_HM_STRING)

-            slot = stbds_hm_find_slot(a, elemsize, *(char**) ((char *) a+elemsize*old_index + keyoffset), keysize, keyoffset, mode);

-          else

-            slot = stbds_hm_find_slot(a, elemsize,  (char* ) a+elemsize*old_index + keyoffset, keysize, keyoffset, mode);

-          STBDS_ASSERT(slot >= 0);

-          b = &table->storage[slot >> STBDS_BUCKET_SHIFT];

-          i = slot & STBDS_BUCKET_MASK;

-          STBDS_ASSERT(b->index[i] == final_index);

-          b->index[i] = old_index;

-        }

-        stbds_header(raw_a)->length -= 1;

-        if (table->used_count < table->used_count_shrink_threshold && table->slot_count > STBDS_BUCKET_LENGTH) {

-          stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count>>1, table);

-          STBDS_FREE(NULL, table);

-          STBDS_STATS(++stbds_hash_shrink);

-        } else if (table->tombstone_count > table->tombstone_count_threshold) {

-          stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count   , table);

-          STBDS_FREE(NULL, table);

-          STBDS_STATS(++stbds_hash_rebuild);

-        }

-        return a;

-      }

-    }

-  }

-  /* NOTREACHED */

-}

-static char *stbds_strdup(char *str)

-{

-  // to keep replaceable allocator simple, we don't want to use strdup.

-  // rolling our own also avoids problem of strdup vs _strdup

-  size_t len = strlen(str)+1;

-  char *p = (char*) STBDS_REALLOC(NULL, 0, len);

-  memmove(p, str, len);

-  return p;

-}

-#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MIN

-#define STBDS_STRING_ARENA_BLOCKSIZE_MIN  512u

-#endif

-#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MAX

-#define STBDS_STRING_ARENA_BLOCKSIZE_MAX  (1u<<20)

-#endif

-char *stbds_stralloc(stbds_string_arena *a, char *str)

-{

-  char *p;

-  size_t len = strlen(str)+1;

-  if (len > a->remaining) {

-    // compute the next blocksize

-    size_t blocksize = a->block;

-    // size is 512, 512, 1024, 1024, 2048, 2048, 4096, 4096, etc., so that

-    // there are log(SIZE) allocations to free when we destroy the table

-    blocksize = (size_t) (STBDS_STRING_ARENA_BLOCKSIZE_MIN) << (blocksize>>1);

-    // if size is under 1M, advance to next blocktype

-    if (blocksize < (size_t)(STBDS_STRING_ARENA_BLOCKSIZE_MAX))

-      ++a->block;

-    if (len > blocksize) {

-      // if string is larger than blocksize, then just allocate the full size.

-      // note that we still advance string_block so block size will continue

-      // increasing, so e.g. if somebody only calls this with 1000-long strings,

-      // eventually the arena will start doubling and handling those as well

-      stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + len);

-      memmove(sb->storage, str, len);

-      if (a->storage) {

-        // insert it after the first element, so that we don't waste the space there

-        sb->next = a->storage->next;

-        a->storage->next = sb;

-      } else {

-        sb->next = 0;

-        a->storage = sb;

-        a->remaining = 0; // this is redundant, but good for clarity

-      }

-      return sb->storage;

-    } else {

-      stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + blocksize);

-      sb->next = a->storage;

-      a->storage = sb;

-      a->remaining = blocksize;

-    }

-  }

-  STBDS_ASSERT(len <= a->remaining);

-  p = a->storage->storage + a->remaining - len;

-  a->remaining -= len;

-  memmove(p, str, len);

-  return p;

-}

-void stbds_strreset(stbds_string_arena *a)

-{

-  stbds_string_block *x,*y;

-  x = a->storage;

-  while (x) {

-    y = x->next;

-    STBDS_FREE(NULL, x);

-    x = y;

-  }

-  memset(a, 0, sizeof(*a));

-}

-#endif

-//////////////////////////////////////////////////////////////////////////////

-//

-//   UNIT TESTS

-//

-#ifdef STBDS_UNIT_TESTS

-#include <stdio.h>

-#ifdef STBDS_ASSERT_WAS_UNDEFINED

-#undef STBDS_ASSERT

-#endif

-#ifndef STBDS_ASSERT

-#define STBDS_ASSERT assert

-#include <assert.h>

-#endif

-typedef struct { int key,b,c,d; } stbds_struct;

-typedef struct { int key[2],b,c,d; } stbds_struct2;

-static char buffer[256];

-char *strkey(int n)

-{

-#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)

-   sprintf_s(buffer, sizeof(buffer), "test_%d", n);

-#else

-   sprintf(buffer, "test_%d", n);

-#endif

-   return buffer;

-}

-void stbds_unit_tests(void)

-{

-#if defined(_MSC_VER) && _MSC_VER <= 1200 && defined(__cplusplus)

-  // VC6 C++ doesn't like the template<> trick on unnamed structures, so do nothing!

-  STBDS_ASSERT(0);

-#else

-  const int testsize = 100000;

-  const int testsize2 = testsize/20;

-  int *arr=NULL;

-  struct { int   key;        int value; }  *intmap  = NULL;

-  struct { char *key;        int value; }  *strmap  = NULL, s;

-  struct { stbds_struct key; int value; }  *map     = NULL;

-  stbds_struct                             *map2    = NULL;

-  stbds_struct2                            *map3    = NULL;

-  stbds_string_arena                        sa      = { 0 };

-  int key3[2] = { 1,2 };

-  ptrdiff_t temp;

-  int i,j;

-  STBDS_ASSERT(arrlen(arr)==0);

-  for (i=0; i < 20000; i += 50) {

-    for (j=0; j < i; ++j)

-      arrpush(arr,j);

-    arrfree(arr);

-  }

-  for (i=0; i < 4; ++i) {

-    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);

-    arrdel(arr,i);

-    arrfree(arr);

-    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);

-    arrdelswap(arr,i);

-    arrfree(arr);

-  }

-  for (i=0; i < 5; ++i) {

-    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);

-    stbds_arrins(arr,i,5);

-    STBDS_ASSERT(arr[i] == 5);

-    if (i < 4)

-      STBDS_ASSERT(arr[4] == 4);

-    arrfree(arr);

-  }

-  i = 1;

-  STBDS_ASSERT(hmgeti(intmap,i) == -1);

-  hmdefault(intmap, -2);

-  STBDS_ASSERT(hmgeti(intmap, i) == -1);

-  STBDS_ASSERT(hmget (intmap, i) == -2);

-  for (i=0; i < testsize; i+=2)

-    hmput(intmap, i, i*5);

-  for (i=0; i < testsize; i+=1) {

-    if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 );

-    else       STBDS_ASSERT(hmget(intmap, i) == i*5);

-    if (i & 1) STBDS_ASSERT(hmget_ts(intmap, i, temp) == -2 );

-    else       STBDS_ASSERT(hmget_ts(intmap, i, temp) == i*5);

-  }

-  for (i=0; i < testsize; i+=2)

-    hmput(intmap, i, i*3);

-  for (i=0; i < testsize; i+=1)

-    if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 );

-    else       STBDS_ASSERT(hmget(intmap, i) == i*3);

-  for (i=2; i < testsize; i+=4)

-    hmdel(intmap, i); // delete half the entries

-  for (i=0; i < testsize; i+=1)

-    if (i & 3) STBDS_ASSERT(hmget(intmap, i) == -2 );

-    else       STBDS_ASSERT(hmget(intmap, i) == i*3);

-  for (i=0; i < testsize; i+=1)

-    hmdel(intmap, i); // delete the rest of the entries

-  for (i=0; i < testsize; i+=1)

-    STBDS_ASSERT(hmget(intmap, i) == -2 );

-  hmfree(intmap);

-  for (i=0; i < testsize; i+=2)

-    hmput(intmap, i, i*3);

-  hmfree(intmap);

-  #if defined(__clang__) || defined(__GNUC__)

-  #ifndef __cplusplus

-  intmap = NULL;

-  hmput(intmap, 15, 7);

-  hmput(intmap, 11, 3);

-  hmput(intmap,  9, 5);

-  STBDS_ASSERT(hmget(intmap, 9) == 5);

-  STBDS_ASSERT(hmget(intmap, 11) == 3);

-  STBDS_ASSERT(hmget(intmap, 15) == 7);

-  #endif

-  #endif

-  for (i=0; i < testsize; ++i)

-    stralloc(&sa, strkey(i));

-  strreset(&sa);

-  {

-    s.key = "a", s.value = 1;

-    shputs(strmap, s);

-    STBDS_ASSERT(*strmap[0].key == 'a');

-    STBDS_ASSERT(strmap[0].key == s.key);

-    STBDS_ASSERT(strmap[0].value == s.value);

-    shfree(strmap);

-  }

-  {

-    s.key = "a", s.value = 1;

-    sh_new_strdup(strmap);

-    shputs(strmap, s);

-    STBDS_ASSERT(*strmap[0].key == 'a');

-    STBDS_ASSERT(strmap[0].key != s.key);

-    STBDS_ASSERT(strmap[0].value == s.value);

-    shfree(strmap);

-  }

-  {

-    s.key = "a", s.value = 1;

-    sh_new_arena(strmap);

-    shputs(strmap, s);

-    STBDS_ASSERT(*strmap[0].key == 'a');

-    STBDS_ASSERT(strmap[0].key != s.key);

-    STBDS_ASSERT(strmap[0].value == s.value);

-    shfree(strmap);

-  }

-  for (j=0; j < 2; ++j) {

-    STBDS_ASSERT(shgeti(strmap,"foo") == -1);

-    if (j == 0)

-      sh_new_strdup(strmap);

-    else

-      sh_new_arena(strmap);

-    STBDS_ASSERT(shgeti(strmap,"foo") == -1);

-    shdefault(strmap, -2);

-    STBDS_ASSERT(shgeti(strmap,"foo") == -1);

-    for (i=0; i < testsize; i+=2)

-      shput(strmap, strkey(i), i*3);

-    for (i=0; i < testsize; i+=1)

-      if (i & 1) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );

-      else       STBDS_ASSERT(shget(strmap, strkey(i)) == i*3);

-    for (i=2; i < testsize; i+=4)

-      shdel(strmap, strkey(i)); // delete half the entries

-    for (i=0; i < testsize; i+=1)

-      if (i & 3) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );

-      else       STBDS_ASSERT(shget(strmap, strkey(i)) == i*3);

-    for (i=0; i < testsize; i+=1)

-      shdel(strmap, strkey(i)); // delete the rest of the entries

-    for (i=0; i < testsize; i+=1)

-      STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );

-    shfree(strmap);

-  }

-  {

-    struct { char *key; char value; } *hash = NULL;

-    char name[4] = "jen";

-    shput(hash, "bob"   , 'h');

-    shput(hash, "sally" , 'e');

-    shput(hash, "fred"  , 'l');

-    shput(hash, "jen"   , 'x');

-    shput(hash, "doug"  , 'o');

-    shput(hash, name    , 'l');

-    shfree(hash);

-  }

-  for (i=0; i < testsize; i += 2) {

-    stbds_struct s = { i,i*2,i*3,i*4 };

-    hmput(map, s, i*5);

-  }

-  for (i=0; i < testsize; i += 1) {

-    stbds_struct s = { i,i*2,i*3  ,i*4 };

-    stbds_struct t = { i,i*2,i*3+1,i*4 };

-    if (i & 1) STBDS_ASSERT(hmget(map, s) == 0);

-    else       STBDS_ASSERT(hmget(map, s) == i*5);

-    if (i & 1) STBDS_ASSERT(hmget_ts(map, s, temp) == 0);

-    else       STBDS_ASSERT(hmget_ts(map, s, temp) == i*5);

-    //STBDS_ASSERT(hmget(map, t.key) == 0);

-  }

-  for (i=0; i < testsize; i += 2) {

-    stbds_struct s = { i,i*2,i*3,i*4 };

-    hmputs(map2, s);

-  }

-  hmfree(map);

-  for (i=0; i < testsize; i += 1) {

-    stbds_struct s = { i,i*2,i*3,i*4 };

-    stbds_struct t = { i,i*2,i*3+1,i*4 };

-    if (i & 1) STBDS_ASSERT(hmgets(map2, s.key).d == 0);

-    else       STBDS_ASSERT(hmgets(map2, s.key).d == i*4);

-    //STBDS_ASSERT(hmgetp(map2, t.key) == 0);

-  }

-  hmfree(map2);

-  for (i=0; i < testsize; i += 2) {

-    stbds_struct2 s = { { i,i*2 }, i*3,i*4, i*5 };

-    hmputs(map3, s);

-  }

-  for (i=0; i < testsize; i += 1) {

-    stbds_struct2 s = { { i,i*2}, i*3, i*4, i*5 };

-    stbds_struct2 t = { { i,i*2}, i*3+1, i*4, i*5 };

-    if (i & 1) STBDS_ASSERT(hmgets(map3, s.key).d == 0);

-    else       STBDS_ASSERT(hmgets(map3, s.key).d == i*5);

-    //STBDS_ASSERT(hmgetp(map3, t.key) == 0);

-  }

-#endif

-}

-#endif

-/*

-------------------------------------------------------------------------------

-This software is available under 2 licenses -- choose whichever you prefer.

-------------------------------------------------------------------------------

-ALTERNATIVE A - MIT License

-Copyright (c) 2019 Sean Barrett

-Permission is hereby granted, free of charge, to any person obtaining a copy of

-this software and associated documentation files (the "Software"), to deal in

-the Software without restriction, including without limitation the rights to

-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

-of the Software, and to permit persons to whom the Software is furnished to do

-so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in all

-copies or substantial portions of the Software.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

-SOFTWARE.

-------------------------------------------------------------------------------

-ALTERNATIVE B - Public Domain (www.unlicense.org)

-This is free and unencumbered software released into the public domain.

-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

-software, either in source code form or as a compiled binary, for any purpose,

-commercial or non-commercial, and by any means.

-In jurisdictions that recognize copyright laws, the author or authors of this

-software dedicate any and all copyright interest in the software to the public

-domain. We make this dedication for the benefit of the public at large and to

-the detriment of our heirs and successors. We intend this dedication to be an

-overt act of relinquishment in perpetuity of all present and future rights to

-this software under copyright law.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-------------------------------------------------------------------------------

-*/

--- a/SDL_Examples/include/stb_image.h

+++ /dev/null

@@ -1,8002 +1,0 @@

-/* stb_image - v2.25 - public domain image loader - http://nothings.org/stb

-								  no warranty implied; use at your own risk

-   Do this:

-	  #define STB_IMAGE_IMPLEMENTATION

-   before you include this file in *one* C or C++ file to create the

-implementation.

-   // i.e. it should look like this:

-   #include ...

-   #include ...

-   #include ...

-   #define STB_IMAGE_IMPLEMENTATION

-   #include "stb_image.h"

-   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.

-   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using

-malloc,realloc,free

-   QUICK NOTES:

-	  Primarily of interest to game developers and other people who can

-		  avoid problematic images and only need the trivial interface

-	  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as

-stock IJG lib) PNG 1/2/4/8/16-bit-per-channel

-	  TGA (not sure what subset, if a subset)

-	  BMP non-1bpp, non-RLE

-	  PSD (composited view only, no extra channels, 8/16 bit-per-channel)

-	  GIF (*comp always reports as 4-channel)

-	  HDR (radiance rgbE format)

-	  PIC (Softimage PIC)

-	  PNM (PPM and PGM binary only)

-	  Animated GIF still needs a proper API, but here's one way to do it:

-		  http://gist.github.com/urraka/685d9a6340b26b830d49

-	  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)

-	  - decode from arbitrary I/O callbacks

-	  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)

-   Full documentation under "DOCUMENTATION" below.

-LICENSE

-  See end of file for license information.

-RECENT REVISION HISTORY:

-	  2.25  (2020-02-02) fix warnings

-	  2.24  (2020-02-02) fix warnings; thread-local failure_reason and

-flip_vertically 2.23  (2019-08-11) fix clang static analysis warning 2.22

-(2019-03-04) gif fixes, fix warnings 2.21  (2019-02-25) fix typo in comment 2.20

-(2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs

-	  2.19  (2018-02-11) fix warning

-	  2.18  (2018-01-30) fix warnings

-	  2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings

-	  2.16  (2017-07-23) all functions have 16-bit variants; optimizations;

-bugfixes 2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE

-detection on GCC 2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for

-Imagenet JPGs 2.13  (2016-12-04) experimental 16-bit API, only for PNG so far;

-fixes 2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11

-(2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 RGB-format JPEG; remove

-white matting in PSD; allocate large structures on the stack; correct channel

-count for PNG & BMP 2.10  (2016-01-22) avoid warning introduced in 2.09 2.09

-(2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED

-   See end of file for full revision history.

- ============================    Contributors    =========================

- Image formats                          Extensions, features

-	Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)

-	Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)

-	Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)

-	Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)

-	Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)

-	Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)

-	Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)

-	github:urraka (animated gif)           Junggon Kim (PNM comments)

-	Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)

-										   socks-the-fox (16-bit PNG)

-										   Jeremy Sawicki (handle all ImageNet

-JPGs) Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)

-	Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)

-	Arseny Kapoulkine

-	John-Mark Allen

-	Carmelo J Fdez-Aguera

- Bug & warning fixes

-	Marc LeBlanc            David Woo          Guillaume George   Martins

-Mozeiko Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil

-Jordan Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed

-	Won Chun                Luke Graham        Johan Duparc       Nick Verigakis

-	the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh

-	Janez Zemva             John Bartholomew   Michal Cichon github:romigrou

-	Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk

-	Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar

-	Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex

-	Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210

-	Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw

-	Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus

-	Julian Raschke          Gregory Mullen     Baldur Karlsson

-github:poppolopoppo Christian Floisand      Kevin Schmidt      JR Smith

-github:darealshinji Brad Weinberger         Matvey Cherevko

-github:Michaelangel007 Blazej Dariusz Roszkowski                  Alexander

-Veselov

-*/

-#ifndef STBI_INCLUDE_STB_IMAGE_H

-#define STBI_INCLUDE_STB_IMAGE_H

-// DOCUMENTATION

-//

-// Limitations:

-//    - no 12-bit-per-channel JPEG

-//    - no JPEGs with arithmetic coding

-//    - GIF always returns *comp=4

-//

-// Basic usage (see HDR discussion below for HDR usage):

-//    int x,y,n;

-//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);

-//    // ... process data if not NULL ...

-//    // ... x = width, y = height, n = # 8-bit components per pixel ...

-//    // ... replace '0' with '1'..'4' to force that many components per pixel

-//    // ... but 'n' will always be the number that it would have been if you

-//    said 0 stbi_image_free(data)

-//

-// Standard parameters:

-//    int *x                 -- outputs image width in pixels

-//    int *y                 -- outputs image height in pixels

-//    int *channels_in_file  -- outputs # of image components in image file

-//    int desired_channels   -- if non-zero, # of image components requested in

-//    result

-//

-// The return value from an image loader is an 'unsigned char *' which points

-// to the pixel data, or NULL on an allocation failure or if the image is

-// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,

-// with each pixel consisting of N interleaved 8-bit components; the first

-// pixel pointed to is top-left-most in the image. There is no padding between

-// image scanlines or between pixels, regardless of format. The number of

-// components N is 'desired_channels' if desired_channels is non-zero, or

-// *channels_in_file otherwise. If desired_channels is non-zero,

-// *channels_in_file has the number of components that _would_ have been

-// output otherwise. E.g. if you set desired_channels to 4, you will always

-// get RGBA output, but you can check *channels_in_file to see if it's trivially

-// opaque because e.g. there were only 3 channels in the source image.

-//

-// An output image with N components has the following components interleaved

-// in this order in each pixel:

-//

-//     N=#comp     components

-//       1           grey

-//       2           grey, alpha

-//       3           red, green, blue

-//       4           red, green, blue, alpha

-//

-// If image loading fails for any reason, the return value will be NULL,

-// and *x, *y, *channels_in_file will be unchanged. The function

-// stbi_failure_reason() can be queried for an extremely brief, end-user

-// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS

-// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get

-// slightly more user-friendly ones.

-//

-// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.

-//

-// ===========================================================================

-//

-// UNICODE:

-//

-//   If compiling for Windows and you wish to use Unicode filenames, compile

-//   with

-//       #define STBI_WINDOWS_UTF8

-//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert

-//   Windows wchar_t filenames to utf8.

-//

-// ===========================================================================

-//

-// Philosophy

-//

-// stb libraries are designed with the following priorities:

-//

-//    1. easy to use

-//    2. easy to maintain

-//    3. good performance

-//

-// Sometimes I let "good performance" creep up in priority over "easy to

-// maintain", and for best performance I may provide less-easy-to-use APIs that

-// give higher performance, in addition to the easy-to-use ones. Nevertheless,

-// it's important to keep in mind that from the standpoint of you, a client of

-// this library, all you care about is #1 and #3, and stb libraries DO NOT

-// emphasize #3 above all.

-//

-// Some secondary priorities arise directly from the first two, some of which

-// provide more explicit reasons why performance can't be emphasized.

-//

-//    - Portable ("ease of use")

-//    - Small source code footprint ("easy to maintain")

-//    - No dependencies ("ease of use")

-//

-// ===========================================================================

-//

-// I/O callbacks

-//

-// I/O callbacks allow you to read from arbitrary sources, like packaged

-// files or some other source. Data read from callbacks are processed

-// through a small internal buffer (currently 128 bytes) to try to reduce

-// overhead.

-//

-// The three functions you must define are "read" (reads some bytes of data),

-// "skip" (skips some bytes of data), "eof" (reports if the stream is at the

-// end).

-//

-// ===========================================================================

-//

-// SIMD support

-//

-// The JPEG decoder will try to automatically use SIMD kernels on x86 when

-// supported by the compiler. For ARM Neon support, you must explicitly

-// request it.

-//

-// (The old do-it-yourself SIMD API is no longer supported in the current

-// code.)

-//

-// On x86, SSE2 will automatically be used when available based on a run-time

-// test; if not, the generic C versions are used as a fall-back. On ARM targets,

-// the typical path is to have separate builds for NEON and non-NEON devices

-// (at least this is true for iOS and Android). Therefore, the NEON support is

-// toggled by a build flag: define STBI_NEON to get NEON loops.

-//

-// If for some reason you do not want to use any of SIMD code, or if

-// you have issues compiling it, you can disable it entirely by

-// defining STBI_NO_SIMD.

-//

-// ===========================================================================

-//

-// HDR image support   (disable by defining STBI_NO_HDR)

-//

-// stb_image supports loading HDR images in general, and currently the Radiance

-// .HDR file format specifically. You can still load any file through the

-// existing interface; if you attempt to load an HDR file, it will be

-// automatically remapped to LDR, assuming gamma 2.2 and an arbitrary scale

-// factor defaulting to 1; both of these constants can be reconfigured through

-// this interface:

-//

-//     stbi_hdr_to_ldr_gamma(2.2f);

-//     stbi_hdr_to_ldr_scale(1.0f);

-//

-// (note, do not use _inverse_ constants; stbi_image will invert them

-// appropriately).

-//

-// Additionally, there is a new, parallel interface for loading files as

-// (linear) floats to preserve the full dynamic range:

-//

-//    float *data = stbi_loadf(filename, &x, &y, &n, 0);

-//

-// If you load LDR images through this interface, those images will

-// be promoted to floating point values, run through the inverse of

-// constants corresponding to the above:

-//

-//     stbi_ldr_to_hdr_scale(1.0f);

-//     stbi_ldr_to_hdr_gamma(2.2f);

-//

-// Finally, given a filename (or an open file or memory block--see header

-// file for details) containing image data, you can query for the "most

-// appropriate" interface to use (that is, whether the image is HDR or

-// not), using:

-//

-//     stbi_is_hdr(char *filename);

-//

-// ===========================================================================

-//

-// iPhone PNG support:

-//

-// By default we convert iphone-formatted PNGs back to RGB, even though

-// they are internally encoded differently. You can disable this conversion

-// by calling stbi_convert_iphone_png_to_rgb(0), in which case

-// you will always just get the native iphone "format" through (which

-// is BGR stored in RGB).

-//

-// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per

-// pixel to remove any premultiplied alpha *only* if the image file explicitly

-// says there's premultiplied data (currently only happens in iPhone images,

-// and only if iPhone convert-to-rgb processing is on).

-//

-// ===========================================================================

-//

-// ADDITIONAL CONFIGURATION

-//

-//  - You can suppress implementation of any of the decoders to reduce

-//    your code footprint by #defining one or more of the following

-//    symbols before creating the implementation.

-//

-//        STBI_NO_JPEG

-//        STBI_NO_PNG

-//        STBI_NO_BMP

-//        STBI_NO_PSD

-//        STBI_NO_TGA

-//        STBI_NO_GIF

-//        STBI_NO_HDR

-//        STBI_NO_PIC

-//        STBI_NO_PNM   (.ppm and .pgm)

-//

-//  - You can request *only* certain decoders and suppress all other ones

-//    (this will be more forward-compatible, as addition of new decoders

-//    doesn't require you to disable them explicitly):

-//

-//        STBI_ONLY_JPEG

-//        STBI_ONLY_PNG

-//        STBI_ONLY_BMP

-//        STBI_ONLY_PSD

-//        STBI_ONLY_TGA

-//        STBI_ONLY_GIF

-//        STBI_ONLY_HDR

-//        STBI_ONLY_PIC

-//        STBI_ONLY_PNM   (.ppm and .pgm)

-//

-//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still

-//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB

-//

-#ifndef STBI_NO_STDIO

-#include <stdio.h>

-#endif // STBI_NO_STDIO

-#define STBI_VERSION 1

-enum {

-	STBI_default = 0, // only used for desired_channels

-	STBI_grey = 1,

-	STBI_grey_alpha = 2,

-	STBI_rgb = 3,

-	STBI_rgb_alpha = 4

-};

-#include <stdlib.h>

-typedef unsigned char stbi_uc;

-typedef unsigned short stbi_us;

-#ifdef __cplusplus

-extern "C" {

-#endif

-#ifndef STBIDEF

-#ifdef STB_IMAGE_STATIC

-#define STBIDEF static

-#else

-#define STBIDEF extern

-#endif

-#endif

-//////////////////////////////////////////////////////////////////////////////

-//

-// PRIMARY API - works on images of any type

-//

-//

-// load image by filename, open file, or memory buffer

-//

-typedef struct {

-	int (*read)(void* user, char* data,

-				int size);			 // fill 'data' with 'size' bytes.  return number of

-									 // bytes actually read

-	void (*skip)(void* user, int n); // skip the next 'n' bytes, or 'unget' the

-									 // last -n bytes if negative

-	int (*eof)(void* user);			 // returns nonzero if we are at end of file/data

-} stbi_io_callbacks;

-////////////////////////////////////

-//

-// 8-bits-per-channel interface

-//

-STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);

-STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);

-#ifndef STBI_NO_STDIO

-STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);

-STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);

-// for stbi_load_from_file, file pointer is left pointing immediately after

-// image

-#endif

-#ifndef STBI_NO_GIF

-STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp);

-#endif

-#ifdef STBI_WINDOWS_UTF8

-STBIDEF int stbi_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input);

-#endif

-////////////////////////////////////

-//

-// 16-bits-per-channel interface

-//

-STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);

-STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);

-#ifndef STBI_NO_STDIO

-STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);

-STBIDEF stbi_us* stbi_load_from_file_16(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);

-#endif

-////////////////////////////////////

-//

-// float-per-channel interface

-//

-#ifndef STBI_NO_LINEAR

-STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);

-STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);

-#ifndef STBI_NO_STDIO

-STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);

-STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);

-#endif

-#endif

-#ifndef STBI_NO_HDR

-STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);

-STBIDEF void stbi_hdr_to_ldr_scale(float scale);

-#endif // STBI_NO_HDR

-#ifndef STBI_NO_LINEAR

-STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);

-STBIDEF void stbi_ldr_to_hdr_scale(float scale);

-#endif // STBI_NO_LINEAR

-// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR

-STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user);

-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len);

-#ifndef STBI_NO_STDIO

-STBIDEF int stbi_is_hdr(char const* filename);

-STBIDEF int stbi_is_hdr_from_file(FILE* f);

-#endif // STBI_NO_STDIO

-// get a VERY brief reason for failure

-// on most compilers (and ALL modern mainstream compilers) this is threadsafe

-STBIDEF const char* stbi_failure_reason(void);

-// free the loaded image -- this is just free()

-STBIDEF void stbi_image_free(void* retval_from_stbi_load);

-// get image dimensions & components without fully decoding

-STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp);

-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp);

-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len);

-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* clbk, void* user);

-#ifndef STBI_NO_STDIO

-STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp);

-STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp);

-STBIDEF int stbi_is_16_bit(char const* filename);

-STBIDEF int stbi_is_16_bit_from_file(FILE* f);

-#endif

-// for image formats that explicitly notate that they have premultiplied alpha,

-// we just return the colors as stored in the file. set this flag to force

-// unpremultiplication. results are undefined if the unpremultiply overflow.

-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);

-// indicate whether we should process iphone images back to canonical format,

-// or just pass them through "as-is"

-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);

-// flip the image vertically, so the first pixel in the output array is the

-// bottom left

-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);

-// as above, but only applies to images loaded on the thread that calls the

-// function this function is only available if your compiler supports

-// thread-local variables; calling it will fail to link if your compiler doesn't

-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);

-// ZLIB client - used by PNG, available for other purposes

-STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen);

-STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header);

-STBIDEF char* stbi_zlib_decode_malloc(const char* buffer, int len, int* outlen);

-STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);

-STBIDEF char* stbi_zlib_decode_noheader_malloc(const char* buffer, int len, int* outlen);

-STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);

-#ifdef __cplusplus

-}

-#endif

-//

-//

-////   end header file   /////////////////////////////////////////////////////

-#endif // STBI_INCLUDE_STB_IMAGE_H

-#ifdef STB_IMAGE_IMPLEMENTATION

-#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) ||                         \

-	defined(STBI_ONLY_PSD) || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) || defined(STBI_ONLY_ZLIB)

-#ifndef STBI_ONLY_JPEG

-#define STBI_NO_JPEG

-#endif

-#ifndef STBI_ONLY_PNG

-#define STBI_NO_PNG

-#endif

-#ifndef STBI_ONLY_BMP

-#define STBI_NO_BMP

-#endif

-#ifndef STBI_ONLY_PSD

-#define STBI_NO_PSD

-#endif

-#ifndef STBI_ONLY_TGA

-#define STBI_NO_TGA

-#endif

-#ifndef STBI_ONLY_GIF

-#define STBI_NO_GIF

-#endif

-#ifndef STBI_ONLY_HDR

-#define STBI_NO_HDR

-#endif

-#ifndef STBI_ONLY_PIC

-#define STBI_NO_PIC

-#endif

-#ifndef STBI_ONLY_PNM

-#define STBI_NO_PNM

-#endif

-#endif

-#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)

-#define STBI_NO_ZLIB

-#endif

-#include <limits.h>

-#include <stdarg.h>

-#include <stddef.h> // ptrdiff_t on osx

-#include <stdlib.h>

-#include <string.h>

-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)

-#include <math.h> // ldexp, pow

-#endif

-#ifndef STBI_NO_STDIO

-#include <stdio.h>

-#endif

-#ifndef STBI_ASSERT

-#include <assert.h>

-#define STBI_ASSERT(x) assert(x)

-#endif

-#ifdef __cplusplus

-#define STBI_EXTERN extern "C"

-#else

-#define STBI_EXTERN extern

-#endif

-#ifndef _MSC_VER

-#ifdef __cplusplus

-#define stbi_inline inline

-#else

-#define stbi_inline

-#endif

-#else

-#define stbi_inline __forceinline

-#endif

-#ifndef STBI_NO_THREAD_LOCALS

-#if defined(__cplusplus) && __cplusplus >= 201103L

-#define STBI_THREAD_LOCAL thread_local

-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L

-#define STBI_THREAD_LOCAL _Thread_local

-#elif defined(__GNUC__)

-#define STBI_THREAD_LOCAL __thread

-#elif defined(_MSC_VER)

-#define STBI_THREAD_LOCAL __declspec(thread)

-#endif

-#endif

-#ifdef _MSC_VER

-typedef unsigned short stbi__uint16;

-typedef signed short stbi__int16;

-typedef unsigned int stbi__uint32;

-typedef signed int stbi__int32;

-#else

-#include <stdint.h>

-typedef uint16_t stbi__uint16;

-typedef int16_t stbi__int16;

-typedef uint32_t stbi__uint32;

-typedef int32_t stbi__int32;

-#endif

-// should produce compiler error if size is wrong

-typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];

-#ifdef _MSC_VER

-#define STBI_NOTUSED(v) (void)(v)

-#else

-#define STBI_NOTUSED(v) (void)sizeof(v)

-#endif

-#ifdef _MSC_VER

-#define STBI_HAS_LROTL

-#endif

-#ifdef STBI_HAS_LROTL

-#define stbi_lrot(x, y) _lrotl(x, y)

-#else

-#define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (32 - (y))))

-#endif

-#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))

-// ok

-#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)

-// ok

-#else

-#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."

-#endif

-#ifndef STBI_MALLOC

-#define STBI_MALLOC(sz) malloc(sz)

-#define STBI_REALLOC(p, newsz) realloc(p, newsz)

-#define STBI_FREE(p) free(p)

-#endif

-#ifndef STBI_REALLOC_SIZED

-#define STBI_REALLOC_SIZED(p, oldsz, newsz) STBI_REALLOC(p, newsz)

-#endif

-// x86/x64 detection

-#if defined(__x86_64__) || defined(_M_X64)

-#define STBI__X64_TARGET

-#elif defined(__i386) || defined(_M_IX86)

-#define STBI__X86_TARGET

-#endif

-#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)

-// gcc doesn't support sse2 intrinsics unless you compile with -msse2,

-// which in turn means it gets to use SSE2 everywhere. This is unfortunate,

-// but previous attempts to provide the SSE2 functions with runtime

-// detection caused numerous issues. The way architecture extensions are

-// exposed in GCC/Clang is, sadly, not really suited for one-file libs.

-// New behavior: if compiled with -msse2, we use SSE2 without any

-// detection; if not, we don't use it at all.

-#define STBI_NO_SIMD

-#endif

-#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)

-// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid

-// STBI__X64_TARGET

-//

-// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the

-// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.

-// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not

-// simultaneously enabling "-mstackrealign".

-//

-// See https://github.com/nothings/stb/issues/81 for more information.

-//

-// So default to no SSE2 on 32-bit MinGW. If you've read this far and added

-// -mstackrealign to your build settings, feel free to #define

-// STBI_MINGW_ENABLE_SSE2.

-#define STBI_NO_SIMD

-#endif

-#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))

-#define STBI_SSE2

-#include <emmintrin.h>

-#ifdef _MSC_VER

-#if _MSC_VER >= 1400 // not VC6

-#include <intrin.h>  // __cpuid

-static int stbi__cpuid3(void) {

-	int info[4];

-	__cpuid(info, 1);

-	return info[3];

-}

-#else

-static int stbi__cpuid3(void) {

-	int res;

-	__asm {

-      mov  eax,1

-      cpuid

-      mov  res,edx

-	}

-	return res;

-}

-#endif

-#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name

-#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)

-static int stbi__sse2_available(void) {

-	int info3 = stbi__cpuid3();

-	return ((info3 >> 26) & 1) != 0;

-}

-#endif

-#else // assume GCC-style if not VC++

-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))

-#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)

-static int stbi__sse2_available(void) {

-	// If we're even attempting to compile this on GCC/Clang, that means

-	// -msse2 is on, which means the compiler is allowed to use SSE2

-	// instructions at will, and so are we.

-	return 1;

-}

-#endif

-#endif

-#endif

-// ARM NEON

-#if defined(STBI_NO_SIMD) && defined(STBI_NEON)

-#undef STBI_NEON

-#endif

-#ifdef STBI_NEON

-#include <arm_neon.h>

-// assume GCC or Clang on ARM targets

-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))

-#endif

-#ifndef STBI_SIMD_ALIGN

-#define STBI_SIMD_ALIGN(type, name) type name

-#endif

-///////////////////////////////////////////////

-//

-//  stbi__context struct and start_xxx functions

-// stbi__context structure is our basic context used by all images, so it

-// contains all the IO context, plus some basic image information

-typedef struct {

-	stbi__uint32 img_x, img_y;

-	int img_n, img_out_n;

-	stbi_io_callbacks io;

-	void* io_user_data;

-	int read_from_callbacks;

-	int buflen;

-	stbi_uc buffer_start[128];

-	stbi_uc *img_buffer, *img_buffer_end;

-	stbi_uc *img_buffer_original, *img_buffer_original_end;

-} stbi__context;

-static void stbi__refill_buffer(stbi__context* s);

-// initialize a memory-decode context

-static void stbi__start_mem(stbi__context* s, stbi_uc const* buffer, int len) {

-	s->io.read = NULL;

-	s->read_from_callbacks = 0;

-	s->img_buffer = s->img_buffer_original = (stbi_uc*)buffer;

-	s->img_buffer_end = s->img_buffer_original_end = (stbi_uc*)buffer + len;

-}

-// initialize a callback-based context

-static void stbi__start_callbacks(stbi__context* s, stbi_io_callbacks* c, void* user) {

-	s->io = *c;

-	s->io_user_data = user;

-	s->buflen = sizeof(s->buffer_start);

-	s->read_from_callbacks = 1;

-	s->img_buffer_original = s->buffer_start;

-	stbi__refill_buffer(s);

-	s->img_buffer_original_end = s->img_buffer_end;

-}

-#ifndef STBI_NO_STDIO

-static int stbi__stdio_read(void* user, char* data, int size) { return (int)fread(data, 1, size, (FILE*)user); }

-static void stbi__stdio_skip(void* user, int n) { fseek((FILE*)user, n, SEEK_CUR); }

-static int stbi__stdio_eof(void* user) { return feof((FILE*)user); }

-static stbi_io_callbacks stbi__stdio_callbacks = {

-	stbi__stdio_read,

-	stbi__stdio_skip,

-	stbi__stdio_eof,

-};

-static void stbi__start_file(stbi__context* s, FILE* f) { stbi__start_callbacks(s, &stbi__stdio_callbacks, (void*)f); }

-// static void stop_file(stbi__context *s) { }

-#endif // !STBI_NO_STDIO

-static void stbi__rewind(stbi__context* s) {

-	// conceptually rewind SHOULD rewind to the beginning of the stream,

-	// but we just rewind to the beginning of the initial buffer, because

-	// we only use it after doing 'test', which only ever looks at at most 92

-	// bytes

-	s->img_buffer = s->img_buffer_original;

-	s->img_buffer_end = s->img_buffer_original_end;

-}

-enum { STBI_ORDER_RGB, STBI_ORDER_BGR };

-typedef struct {

-	int bits_per_channel;

-	int num_channels;

-	int channel_order;

-} stbi__result_info;

-#ifndef STBI_NO_JPEG

-static int stbi__jpeg_test(stbi__context* s);

-static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-#ifndef STBI_NO_PNG

-static int stbi__png_test(stbi__context* s);

-static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp);

-static int stbi__png_is16(stbi__context* s);

-#endif

-#ifndef STBI_NO_BMP

-static int stbi__bmp_test(stbi__context* s);

-static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-#ifndef STBI_NO_TGA

-static int stbi__tga_test(stbi__context* s);

-static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-#ifndef STBI_NO_PSD

-static int stbi__psd_test(stbi__context* s);

-static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc);

-static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp);

-static int stbi__psd_is16(stbi__context* s);

-#endif

-#ifndef STBI_NO_HDR

-static int stbi__hdr_test(stbi__context* s);

-static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-#ifndef STBI_NO_PIC

-static int stbi__pic_test(stbi__context* s);

-static void* stbi__pic_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-#ifndef STBI_NO_GIF

-static int stbi__gif_test(stbi__context* s);

-static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp);

-static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-#ifndef STBI_NO_PNM

-static int stbi__pnm_test(stbi__context* s);

-static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

-static int stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp);

-#endif

-static

-#ifdef STBI_THREAD_LOCAL

-	STBI_THREAD_LOCAL

-#endif

-	const char* stbi__g_failure_reason;

-STBIDEF const char* stbi_failure_reason(void) { return stbi__g_failure_reason; }

-#ifndef STBI_NO_FAILURE_STRINGS

-static int stbi__err(const char* str) {

-	stbi__g_failure_reason = str;

-	return 0;

-}

-#endif

-static void* stbi__malloc(size_t size) { return STBI_MALLOC(size); }

-// stb_image uses ints pervasively, including for offset calculations.

-// therefore the largest decoded image size we can support with the

-// current code, even on 64-bit targets, is INT_MAX. this is not a

-// significant limitation for the intended use case.

-//

-// we do, however, need to make sure our size calculations don't

-// overflow. hence a few helper functions for size calculations that

-// multiply integers together, making sure that they're non-negative

-// and no overflow occurs.

-// return 1 if the sum is valid, 0 on overflow.

-// negative terms are considered invalid.

-static int stbi__addsizes_valid(int a, int b) {

-	if (b < 0)

-		return 0;

-	// now 0 <= b <= INT_MAX, hence also

-	// 0 <= INT_MAX - b <= INTMAX.

-	// And "a + b <= INT_MAX" (which might overflow) is the

-	// same as a <= INT_MAX - b (no overflow)

-	return a <= INT_MAX - b;

-}

-// returns 1 if the product is valid, 0 on overflow.

-// negative factors are considered invalid.

-static int stbi__mul2sizes_valid(int a, int b) {

-	if (a < 0 || b < 0)

-		return 0;

-	if (b == 0)

-		return 1; // mul-by-0 is always safe

-	// portable way to check for no overflows in a*b

-	return a <= INT_MAX / b;

-}

-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)

-// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow

-static int stbi__mad2sizes_valid(int a, int b, int add) { return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add); }

-#endif

-// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow

-static int stbi__mad3sizes_valid(int a, int b, int c, int add) {

-	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && stbi__addsizes_valid(a * b * c, add);

-}

-// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't

-// overflow

-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)

-static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) {

-	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && stbi__mul2sizes_valid(a * b * c, d) && stbi__addsizes_valid(a * b * c * d, add);

-}

-#endif

-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)

-// mallocs with size overflow checking

-static void* stbi__malloc_mad2(int a, int b, int add) {

-	if (!stbi__mad2sizes_valid(a, b, add))

-		return NULL;

-	return stbi__malloc(a * b + add);

-}

-#endif

-static void* stbi__malloc_mad3(int a, int b, int c, int add) {

-	if (!stbi__mad3sizes_valid(a, b, c, add))

-		return NULL;

-	return stbi__malloc(a * b * c + add);

-}

-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)

-static void* stbi__malloc_mad4(int a, int b, int c, int d, int add) {

-	if (!stbi__mad4sizes_valid(a, b, c, d, add))

-		return NULL;

-	return stbi__malloc(a * b * c * d + add);

-}

-#endif

-// stbi__err - error

-// stbi__errpf - error returning pointer to float

-// stbi__errpuc - error returning pointer to unsigned char

-#ifdef STBI_NO_FAILURE_STRINGS

-#define stbi__err(x, y) 0

-#elif defined(STBI_FAILURE_USERMSG)

-#define stbi__err(x, y) stbi__err(y)

-#else

-#define stbi__err(x, y) stbi__err(x)

-#endif

-#define stbi__errpf(x, y) ((float*)(size_t)(stbi__err(x, y) ? NULL : NULL))

-#define stbi__errpuc(x, y) ((unsigned char*)(size_t)(stbi__err(x, y) ? NULL : NULL))

-STBIDEF void stbi_image_free(void* retval_from_stbi_load) { STBI_FREE(retval_from_stbi_load); }

-#ifndef STBI_NO_LINEAR

-static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp);

-#endif

-#ifndef STBI_NO_HDR

-static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp);

-#endif

-static int stbi__vertically_flip_on_load_global = 0;

-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { stbi__vertically_flip_on_load_global = flag_true_if_should_flip; }

-#ifndef STBI_THREAD_LOCAL

-#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global

-#else

-static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;

-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) {

-	stbi__vertically_flip_on_load_local = flag_true_if_should_flip;

-	stbi__vertically_flip_on_load_set = 1;

-}

-#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set ? stbi__vertically_flip_on_load_local : stbi__vertically_flip_on_load_global)

-#endif // STBI_THREAD_LOCAL

-static void* stbi__load_main(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc) {

-	memset(ri, 0,

-		   sizeof(*ri));				// make sure it's initialized if we add new fields

-	ri->bits_per_channel = 8;			// default is 8 so most paths don't have to be changed

-	ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here

-										// so we can add BGR order

-	ri->num_channels = 0;

-#ifndef STBI_NO_JPEG

-	if (stbi__jpeg_test(s))

-		return stbi__jpeg_load(s, x, y, comp, req_comp, ri);

-#endif

-#ifndef STBI_NO_PNG

-	if (stbi__png_test(s))

-		return stbi__png_load(s, x, y, comp, req_comp, ri);

-#endif

-#ifndef STBI_NO_BMP

-	if (stbi__bmp_test(s))

-		return stbi__bmp_load(s, x, y, comp, req_comp, ri);

-#endif

-#ifndef STBI_NO_GIF

-	if (stbi__gif_test(s))

-		return stbi__gif_load(s, x, y, comp, req_comp, ri);

-#endif

-#ifndef STBI_NO_PSD

-	if (stbi__psd_test(s))

-		return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);

-#else

-	STBI_NOTUSED(bpc);

-#endif

-#ifndef STBI_NO_PIC

-	if (stbi__pic_test(s))

-		return stbi__pic_load(s, x, y, comp, req_comp, ri);

-#endif

-#ifndef STBI_NO_PNM

-	if (stbi__pnm_test(s))

-		return stbi__pnm_load(s, x, y, comp, req_comp, ri);

-#endif

-#ifndef STBI_NO_HDR

-	if (stbi__hdr_test(s)) {

-		float* hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);

-		return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);

-	}

-#endif

-#ifndef STBI_NO_TGA

-	// test tga last because it's a crappy test!

-	if (stbi__tga_test(s))

-		return stbi__tga_load(s, x, y, comp, req_comp, ri);

-#endif

-	return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");

-}

-static stbi_uc* stbi__convert_16_to_8(stbi__uint16* orig, int w, int h, int channels) {

-	int i;

-	int img_len = w * h * channels;

-	stbi_uc* reduced;

-	reduced = (stbi_uc*)stbi__malloc(img_len);

-	if (reduced == NULL)

-		return stbi__errpuc("outofmem", "Out of memory");

-	for (i = 0; i < img_len; ++i)

-		reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient

-													   // approx of 16->8 bit scaling

-	STBI_FREE(orig);

-	return reduced;

-}

-static stbi__uint16* stbi__convert_8_to_16(stbi_uc* orig, int w, int h, int channels) {

-	int i;

-	int img_len = w * h * channels;

-	stbi__uint16* enlarged;

-	enlarged = (stbi__uint16*)stbi__malloc(img_len * 2);

-	if (enlarged == NULL)

-		return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");

-	for (i = 0; i < img_len; ++i)

-		enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff

-	STBI_FREE(orig);

-	return enlarged;

-}

-static void stbi__vertical_flip(void* image, int w, int h, int bytes_per_pixel) {

-	int row;

-	size_t bytes_per_row = (size_t)w * bytes_per_pixel;

-	stbi_uc temp[2048];

-	stbi_uc* bytes = (stbi_uc*)image;

-	for (row = 0; row < (h >> 1); row++) {

-		stbi_uc* row0 = bytes + row * bytes_per_row;

-		stbi_uc* row1 = bytes + (h - row - 1) * bytes_per_row;

-		// swap row0 with row1

-		size_t bytes_left = bytes_per_row;

-		while (bytes_left) {

-			size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);

-			memcpy(temp, row0, bytes_copy);

-			memcpy(row0, row1, bytes_copy);

-			memcpy(row1, temp, bytes_copy);

-			row0 += bytes_copy;

-			row1 += bytes_copy;

-			bytes_left -= bytes_copy;

-		}

-	}

-}

-#ifndef STBI_NO_GIF

-static void stbi__vertical_flip_slices(void* image, int w, int h, int z, int bytes_per_pixel) {

-	int slice;

-	int slice_size = w * h * bytes_per_pixel;

-	stbi_uc* bytes = (stbi_uc*)image;

-	for (slice = 0; slice < z; ++slice) {

-		stbi__vertical_flip(bytes, w, h, bytes_per_pixel);

-		bytes += slice_size;

-	}

-}

-#endif

-static unsigned char* stbi__load_and_postprocess_8bit(stbi__context* s, int* x, int* y, int* comp, int req_comp) {

-	stbi__result_info ri;

-	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);

-	if (result == NULL)

-		return NULL;

-	if (ri.bits_per_channel != 8) {

-		STBI_ASSERT(ri.bits_per_channel == 16);

-		result = stbi__convert_16_to_8((stbi__uint16*)result, *x, *y, req_comp == 0 ? *comp : req_comp);

-		ri.bits_per_channel = 8;

-	}

-	// @TODO: move stbi__convert_format to here

-	if (stbi__vertically_flip_on_load) {

-		int channels = req_comp ? req_comp : *comp;

-		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));

-	}

-	return (unsigned char*)result;

-}

-static stbi__uint16* stbi__load_and_postprocess_16bit(stbi__context* s, int* x, int* y, int* comp, int req_comp) {

-	stbi__result_info ri;

-	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);

-	if (result == NULL)

-		return NULL;

-	if (ri.bits_per_channel != 16) {

-		STBI_ASSERT(ri.bits_per_channel == 8);

-		result = stbi__convert_8_to_16((stbi_uc*)result, *x, *y, req_comp == 0 ? *comp : req_comp);

-		ri.bits_per_channel = 16;

-	}

-	// @TODO: move stbi__convert_format16 to here

-	// @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to

-	// keep more precision

-	if (stbi__vertically_flip_on_load) {

-		int channels = req_comp ? req_comp : *comp;

-		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));

-	}

-	return (stbi__uint16*)result;

-}

-#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)

-static void stbi__float_postprocess(float* result, int* x, int* y, int* comp, int req_comp) {

-	if (stbi__vertically_flip_on_load && result != NULL) {

-		int channels = req_comp ? req_comp : *comp;

-		stbi__vertical_flip(result, *x, *y, channels * sizeof(float));

-	}

-}

-#endif

-#ifndef STBI_NO_STDIO

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char* str, int cbmb, wchar_t* widestr,

-																	int cchwide);

-STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t* widestr, int cchwide, char* str,

-																	int cbmb, const char* defchar, int* used_default);

-#endif

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-STBIDEF int stbi_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input) {

-	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);

-}

-#endif

-static FILE* stbi__fopen(char const* filename, char const* mode) {

-	FILE* f;

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-	wchar_t wMode[64];

-	wchar_t wFilename[1024];

-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))

-		return 0;

-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))

-		return 0;

-#if _MSC_VER >= 1400

-	if (0 != _wfopen_s(&f, wFilename, wMode))

-		f = 0;

-#else

-	f = _wfopen(wFilename, wMode);

-#endif

-#elif defined(_MSC_VER) && _MSC_VER >= 1400

-	if (0 != fopen_s(&f, filename, mode))

-		f = 0;

-#else

-	f = fopen(filename, mode);

-#endif

-	return f;

-}

-STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* comp, int req_comp) {

-	FILE* f = stbi__fopen(filename, "rb");

-	unsigned char* result;

-	if (!f)

-		return stbi__errpuc("can't fopen", "Unable to open file");

-	result = stbi_load_from_file(f, x, y, comp, req_comp);

-	fclose(f);

-	return result;

-}

-STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* comp, int req_comp) {

-	unsigned char* result;

-	stbi__context s;

-	stbi__start_file(&s, f);

-	result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);

-	if (result) {

-		// need to 'unget' all the characters in the IO buffer

-		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);

-	}

-	return result;

-}

-STBIDEF stbi__uint16* stbi_load_from_file_16(FILE* f, int* x, int* y, int* comp, int req_comp) {

-	stbi__uint16* result;

-	stbi__context s;

-	stbi__start_file(&s, f);

-	result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);

-	if (result) {

-		// need to 'unget' all the characters in the IO buffer

-		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);

-	}

-	return result;

-}

-STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* comp, int req_comp) {

-	FILE* f = stbi__fopen(filename, "rb");

-	stbi__uint16* result;

-	if (!f)

-		return (stbi_us*)stbi__errpuc("can't fopen", "Unable to open file");

-	result = stbi_load_from_file_16(f, x, y, comp, req_comp);

-	fclose(f);

-	return result;

-}

-#endif //! STBI_NO_STDIO

-STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels) {

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);

-}

-STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels) {

-	stbi__context s;

-	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

-	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);

-}

-STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp) {

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);

-}

-STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp) {

-	stbi__context s;

-	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

-	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);

-}

-#ifndef STBI_NO_GIF

-STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp) {

-	unsigned char* result;

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	result = (unsigned char*)stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);

-	if (stbi__vertically_flip_on_load) {

-		stbi__vertical_flip_slices(result, *x, *y, *z, *comp);

-	}

-	return result;

-}

-#endif

-#ifndef STBI_NO_LINEAR

-static float* stbi__loadf_main(stbi__context* s, int* x, int* y, int* comp, int req_comp) {

-	unsigned char* data;

-#ifndef STBI_NO_HDR

-	if (stbi__hdr_test(s)) {

-		stbi__result_info ri;

-		float* hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);

-		if (hdr_data)

-			stbi__float_postprocess(hdr_data, x, y, comp, req_comp);

-		return hdr_data;

-	}

-#endif

-	data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);

-	if (data)

-		return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);

-	return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");

-}

-STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp) {

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	return stbi__loadf_main(&s, x, y, comp, req_comp);

-}

-STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp) {

-	stbi__context s;

-	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

-	return stbi__loadf_main(&s, x, y, comp, req_comp);

-}

-#ifndef STBI_NO_STDIO

-STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* comp, int req_comp) {

-	float* result;

-	FILE* f = stbi__fopen(filename, "rb");

-	if (!f)

-		return stbi__errpf("can't fopen", "Unable to open file");

-	result = stbi_loadf_from_file(f, x, y, comp, req_comp);

-	fclose(f);

-	return result;

-}

-STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* comp, int req_comp) {

-	stbi__context s;

-	stbi__start_file(&s, f);

-	return stbi__loadf_main(&s, x, y, comp, req_comp);

-}

-#endif // !STBI_NO_STDIO

-#endif // !STBI_NO_LINEAR

-// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is

-// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always

-// reports false!

-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len) {

-#ifndef STBI_NO_HDR

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	return stbi__hdr_test(&s);

-#else

-	STBI_NOTUSED(buffer);

-	STBI_NOTUSED(len);

-	return 0;

-#endif

-}

-#ifndef STBI_NO_STDIO

-STBIDEF int stbi_is_hdr(char const* filename) {

-	FILE* f = stbi__fopen(filename, "rb");

-	int result = 0;

-	if (f) {

-		result = stbi_is_hdr_from_file(f);

-		fclose(f);

-	}

-	return result;

-}

-STBIDEF int stbi_is_hdr_from_file(FILE* f) {

-#ifndef STBI_NO_HDR

-	long pos = ftell(f);

-	int res;

-	stbi__context s;

-	stbi__start_file(&s, f);

-	res = stbi__hdr_test(&s);

-	fseek(f, pos, SEEK_SET);

-	return res;

-#else

-	STBI_NOTUSED(f);

-	return 0;

-#endif

-}

-#endif // !STBI_NO_STDIO

-STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user) {

-#ifndef STBI_NO_HDR

-	stbi__context s;

-	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

-	return stbi__hdr_test(&s);

-#else

-	STBI_NOTUSED(clbk);

-	STBI_NOTUSED(user);

-	return 0;

-#endif

-}

-#ifndef STBI_NO_LINEAR

-static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;

-STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }

-STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }

-#endif

-static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;

-STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }

-STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }

-//////////////////////////////////////////////////////////////////////////////

-//

-// Common code used by all image loaders

-//

-enum { STBI__SCAN_load = 0, STBI__SCAN_type, STBI__SCAN_header };

-static void stbi__refill_buffer(stbi__context* s) {

-	int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);

-	if (n == 0) {

-		// at end of file, treat same as if from memory, but need to handle case

-		// where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file

-		s->read_from_callbacks = 0;

-		s->img_buffer = s->buffer_start;

-		s->img_buffer_end = s->buffer_start + 1;

-		*s->img_buffer = 0;

-	} else {

-		s->img_buffer = s->buffer_start;

-		s->img_buffer_end = s->buffer_start + n;

-	}

-}

-stbi_inline static stbi_uc stbi__get8(stbi__context* s) {

-	if (s->img_buffer < s->img_buffer_end)

-		return *s->img_buffer++;

-	if (s->read_from_callbacks) {

-		stbi__refill_buffer(s);

-		return *s->img_buffer++;

-	}

-	return 0;

-}

-#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)

-// nothing

-#else

-stbi_inline static int stbi__at_eof(stbi__context* s) {

-	if (s->io.read) {

-		if (!(s->io.eof)(s->io_user_data))

-			return 0;

-		// if feof() is true, check if buffer = end

-		// special case: we've only got the special 0 character at the end

-		if (s->read_from_callbacks == 0)

-			return 1;

-	}

-	return s->img_buffer >= s->img_buffer_end;

-}

-#endif

-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) &&           \

-	defined(STBI_NO_PIC)

-// nothing

-#else

-static void stbi__skip(stbi__context* s, int n) {

-	if (n < 0) {

-		s->img_buffer = s->img_buffer_end;

-		return;

-	}

-	if (s->io.read) {

-		int blen = (int)(s->img_buffer_end - s->img_buffer);

-		if (blen < n) {

-			s->img_buffer = s->img_buffer_end;

-			(s->io.skip)(s->io_user_data, n - blen);

-			return;

-		}

-	}

-	s->img_buffer += n;

-}

-#endif

-#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)

-// nothing

-#else

-static int stbi__getn(stbi__context* s, stbi_uc* buffer, int n) {

-	if (s->io.read) {

-		int blen = (int)(s->img_buffer_end - s->img_buffer);

-		if (blen < n) {

-			int res, count;

-			memcpy(buffer, s->img_buffer, blen);

-			count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);

-			res = (count == (n - blen));

-			s->img_buffer = s->img_buffer_end;

-			return res;

-		}

-	}

-	if (s->img_buffer + n <= s->img_buffer_end) {

-		memcpy(buffer, s->img_buffer, n);

-		s->img_buffer += n;

-		return 1;

-	} else

-		return 0;

-}

-#endif

-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)

-// nothing

-#else

-static int stbi__get16be(stbi__context* s) {

-	int z = stbi__get8(s);

-	return (z << 8) + stbi__get8(s);

-}

-#endif

-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)

-// nothing

-#else

-static stbi__uint32 stbi__get32be(stbi__context* s) {

-	stbi__uint32 z = stbi__get16be(s);

-	return (z << 16) + stbi__get16be(s);

-}

-#endif

-#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)

-// nothing

-#else

-static int stbi__get16le(stbi__context* s) {

-	int z = stbi__get8(s);

-	return z + (stbi__get8(s) << 8);

-}

-#endif

-#ifndef STBI_NO_BMP

-static stbi__uint32 stbi__get32le(stbi__context* s) {

-	stbi__uint32 z = stbi__get16le(s);

-	return z + (stbi__get16le(s) << 16);

-}

-#endif

-#define STBI__BYTECAST(x) ((stbi_uc)((x)&255)) // truncate int to byte without warnings

-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) &&           \

-	defined(STBI_NO_PIC) && defined(STBI_NO_PNM)

-// nothing

-#else

-//////////////////////////////////////////////////////////////////////////////

-//

-//  generic converter from built-in img_n to req_comp

-//    individual types do this automatically as much as possible (e.g. jpeg

-//    does all cases internally since it needs to colorspace convert anyway,

-//    and it never has alpha, so very few cases ). png can automatically

-//    interleave an alpha=255 channel, but falls back to this for other cases

-//

-//  assume data buffer is malloced, so malloc a new one and free that one

-//  only failure mode is malloc failing

-static stbi_uc stbi__compute_y(int r, int g, int b) { return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8); }

-#endif

-#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) &&            \

-	defined(STBI_NO_PNM)

-// nothing

-#else

-static unsigned char* stbi__convert_format(unsigned char* data, int img_n, int req_comp, unsigned int x, unsigned int y) {

-	int i, j;

-	unsigned char* good;

-	if (req_comp == img_n)

-		return data;

-	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);

-	good = (unsigned char*)stbi__malloc_mad3(req_comp, x, y, 0);

-	if (good == NULL) {

-		STBI_FREE(data);

-		return stbi__errpuc("outofmem", "Out of memory");

-	}

-	for (j = 0; j < (int)y; ++j) {

-		unsigned char* src = data + j * x * img_n;

-		unsigned char* dest = good + j * x * req_comp;

-#define STBI__COMBO(a, b) ((a)*8 + (b))

-#define STBI__CASE(a, b)                                                                                                                                       \

-	case STBI__COMBO(a, b):                                                                                                                                    \

-		for (i = x - 1; i >= 0; --i, src += a, dest += b)

-		// convert source image with img_n components to one with req_comp

-		// components; avoid switch per pixel, so use switch per scanline and

-		// massive macros

-		switch (STBI__COMBO(img_n, req_comp)) {

-			STBI__CASE(1, 2) {

-				dest[0] = src[0];

-				dest[1] = 255;

-			}

-			break;

-			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

-			break;

-			STBI__CASE(1, 4) {

-				dest[0] = dest[1] = dest[2] = src[0];

-				dest[3] = 255;

-			}

-			break;

-			STBI__CASE(2, 1) { dest[0] = src[0]; }

-			break;

-			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

-			break;

-			STBI__CASE(2, 4) {

-				dest[0] = dest[1] = dest[2] = src[0];

-				dest[3] = src[1];

-			}

-			break;

-			STBI__CASE(3, 4) {

-				dest[0] = src[0];

-				dest[1] = src[1];

-				dest[2] = src[2];

-				dest[3] = 255;

-			}

-			break;

-			STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }

-			break;

-			STBI__CASE(3, 2) {

-				dest[0] = stbi__compute_y(src[0], src[1], src[2]);

-				dest[1] = 255;

-			}

-			break;

-			STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }

-			break;

-			STBI__CASE(4, 2) {

-				dest[0] = stbi__compute_y(src[0], src[1], src[2]);

-				dest[1] = src[3];

-			}

-			break;

-			STBI__CASE(4, 3) {

-				dest[0] = src[0];

-				dest[1] = src[1];

-				dest[2] = src[2];

-			}

-			break;

-		default:

-			STBI_ASSERT(0);

-		}

-#undef STBI__CASE

-	}

-	STBI_FREE(data);

-	return good;

-}

-#endif

-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)

-// nothing

-#else

-static stbi__uint16 stbi__compute_y_16(int r, int g, int b) { return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8); }

-#endif

-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)

-// nothing

-#else

-static stbi__uint16* stbi__convert_format16(stbi__uint16* data, int img_n, int req_comp, unsigned int x, unsigned int y) {

-	int i, j;

-	stbi__uint16* good;

-	if (req_comp == img_n)

-		return data;

-	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);

-	good = (stbi__uint16*)stbi__malloc(req_comp * x * y * 2);

-	if (good == NULL) {

-		STBI_FREE(data);

-		return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");

-	}

-	for (j = 0; j < (int)y; ++j) {

-		stbi__uint16* src = data + j * x * img_n;

-		stbi__uint16* dest = good + j * x * req_comp;

-#define STBI__COMBO(a, b) ((a)*8 + (b))

-#define STBI__CASE(a, b)                                                                                                                                       \

-	case STBI__COMBO(a, b):                                                                                                                                    \

-		for (i = x - 1; i >= 0; --i, src += a, dest += b)

-		// convert source image with img_n components to one with req_comp

-		// components; avoid switch per pixel, so use switch per scanline and

-		// massive macros

-		switch (STBI__COMBO(img_n, req_comp)) {

-			STBI__CASE(1, 2) {

-				dest[0] = src[0];

-				dest[1] = 0xffff;

-			}

-			break;

-			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

-			break;

-			STBI__CASE(1, 4) {

-				dest[0] = dest[1] = dest[2] = src[0];

-				dest[3] = 0xffff;

-			}

-			break;

-			STBI__CASE(2, 1) { dest[0] = src[0]; }

-			break;

-			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

-			break;

-			STBI__CASE(2, 4) {

-				dest[0] = dest[1] = dest[2] = src[0];

-				dest[3] = src[1];

-			}

-			break;

-			STBI__CASE(3, 4) {

-				dest[0] = src[0];

-				dest[1] = src[1];

-				dest[2] = src[2];

-				dest[3] = 0xffff;

-			}

-			break;

-			STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }

-			break;

-			STBI__CASE(3, 2) {

-				dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);

-				dest[1] = 0xffff;

-			}

-			break;

-			STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }

-			break;

-			STBI__CASE(4, 2) {

-				dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);

-				dest[1] = src[3];

-			}

-			break;

-			STBI__CASE(4, 3) {

-				dest[0] = src[0];

-				dest[1] = src[1];

-				dest[2] = src[2];

-			}

-			break;

-		default:

-			STBI_ASSERT(0);

-		}

-#undef STBI__CASE

-	}

-	STBI_FREE(data);

-	return good;

-}

-#endif

-#ifndef STBI_NO_LINEAR

-static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp) {

-	int i, k, n;

-	float* output;

-	if (!data)

-		return NULL;

-	output = (float*)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);

-	if (output == NULL) {

-		STBI_FREE(data);

-		return stbi__errpf("outofmem", "Out of memory");

-	}

-	// compute number of non-alpha components

-	if (comp & 1)

-		n = comp;

-	else

-		n = comp - 1;

-	for (i = 0; i < x * y; ++i) {

-		for (k = 0; k < n; ++k) {

-			output[i * comp + k] = (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);

-		}

-	}

-	if (n < comp) {

-		for (i = 0; i < x * y; ++i) {

-			output[i * comp + n] = data[i * comp + n] / 255.0f;

-		}

-	}

-	STBI_FREE(data);

-	return output;

-}

-#endif

-#ifndef STBI_NO_HDR

-#define stbi__float2int(x) ((int)(x))

-static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp) {

-	int i, k, n;

-	stbi_uc* output;

-	if (!data)

-		return NULL;

-	output = (stbi_uc*)stbi__malloc_mad3(x, y, comp, 0);

-	if (output == NULL) {

-		STBI_FREE(data);

-		return stbi__errpuc("outofmem", "Out of memory");

-	}

-	// compute number of non-alpha components

-	if (comp & 1)

-		n = comp;

-	else

-		n = comp - 1;

-	for (i = 0; i < x * y; ++i) {

-		for (k = 0; k < n; ++k) {

-			float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;

-			if (z < 0)

-				z = 0;

-			if (z > 255)

-				z = 255;

-			output[i * comp + k] = (stbi_uc)stbi__float2int(z);

-		}

-		if (k < comp) {

-			float z = data[i * comp + k] * 255 + 0.5f;

-			if (z < 0)

-				z = 0;

-			if (z > 255)

-				z = 255;

-			output[i * comp + k] = (stbi_uc)stbi__float2int(z);

-		}

-	}

-	STBI_FREE(data);

-	return output;

-}

-#endif

-//////////////////////////////////////////////////////////////////////////////

-//

-//  "baseline" JPEG/JFIF decoder

-//

-//    simple implementation

-//      - doesn't support delayed output of y-dimension

-//      - simple interface (only one output format: 8-bit interleaved RGB)

-//      - doesn't try to recover corrupt jpegs

-//      - doesn't allow partial loading, loading multiple at once

-//      - still fast on x86 (copying globals into locals doesn't help x86)

-//      - allocates lots of intermediate memory (full size of all components)

-//        - non-interleaved case requires this anyway

-//        - allows good upsampling (see next)

-//    high-quality

-//      - upsampled channels are bilinearly interpolated, even across blocks

-//      - quality integer IDCT derived from IJG's 'slow'

-//    performance

-//      - fast huffman; reasonable integer IDCT

-//      - some SIMD kernels for common paths on targets with SSE2/NEON

-//      - uses a lot of intermediate memory, could cache poorly

-#ifndef STBI_NO_JPEG

-// huffman decoding acceleration

-#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache

-typedef struct {

-	stbi_uc fast[1 << FAST_BITS];

-	// weirdly, repacking this into AoS is a 10% speed loss, instead of a win

-	stbi__uint16 code[256];

-	stbi_uc values[256];

-	stbi_uc size[257];

-	unsigned int maxcode[18];

-	int delta[17]; // old 'firstsymbol' - old 'firstcode'

-} stbi__huffman;

-typedef struct {

-	stbi__context* s;

-	stbi__huffman huff_dc[4];

-	stbi__huffman huff_ac[4];

-	stbi__uint16 dequant[4][64];

-	stbi__int16 fast_ac[4][1 << FAST_BITS];

-	// sizes for components, interleaved MCUs

-	int img_h_max, img_v_max;

-	int img_mcu_x, img_mcu_y;

-	int img_mcu_w, img_mcu_h;

-	// definition of jpeg image component

-	struct {

-		int id;

-		int h, v;

-		int tq;

-		int hd, ha;

-		int dc_pred;

-		int x, y, w2, h2;

-		stbi_uc* data;

-		void *raw_data, *raw_coeff;

-		stbi_uc* linebuf;

-		short* coeff;		  // progressive only

-		int coeff_w, coeff_h; // number of 8x8 coefficient blocks

-	} img_comp[4];

-	stbi__uint32 code_buffer; // jpeg entropy-coded buffer

-	int code_bits;			  // number of valid bits

-	unsigned char marker;	 // marker seen while filling entropy buffer

-	int nomore;				  // flag if we saw a marker so must stop

-	int progressive;

-	int spec_start;

-	int spec_end;

-	int succ_high;

-	int succ_low;

-	int eob_run;

-	int jfif;

-	int app14_color_transform; // Adobe APP14 tag

-	int rgb;

-	int scan_n, order[4];

-	int restart_interval, todo;

-	// kernels

-	void (*idct_block_kernel)(stbi_uc* out, int out_stride, short data[64]);

-	void (*YCbCr_to_RGB_kernel)(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step);

-	stbi_uc* (*resample_row_hv_2_kernel)(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs);

-} stbi__jpeg;

-static int stbi__build_huffman(stbi__huffman* h, int* count) {

-	int i, j, k = 0;

-	unsigned int code;

-	// build size list for each symbol (from JPEG spec)

-	for (i = 0; i < 16; ++i)

-		for (j = 0; j < count[i]; ++j)

-			h->size[k++] = (stbi_uc)(i + 1);

-	h->size[k] = 0;

-	// compute actual symbols (from jpeg spec)

-	code = 0;

-	k = 0;

-	for (j = 1; j <= 16; ++j) {

-		// compute delta to add to code to compute symbol id

-		h->delta[j] = k - code;

-		if (h->size[k] == j) {

-			while (h->size[k] == j)

-				h->code[k++] = (stbi__uint16)(code++);

-			if (code - 1 >= (1u << j))

-				return stbi__err("bad code lengths", "Corrupt JPEG");

-		}

-		// compute largest code + 1 for this size, preshifted as needed later

-		h->maxcode[j] = code << (16 - j);

-		code <<= 1;

-	}

-	h->maxcode[j] = 0xffffffff;

-	// build non-spec acceleration table; 255 is flag for not-accelerated

-	memset(h->fast, 255, 1 << FAST_BITS);

-	for (i = 0; i < k; ++i) {

-		int s = h->size[i];

-		if (s <= FAST_BITS) {

-			int c = h->code[i] << (FAST_BITS - s);

-			int m = 1 << (FAST_BITS - s);

-			for (j = 0; j < m; ++j) {

-				h->fast[c + j] = (stbi_uc)i;

-			}

-		}

-	}

-	return 1;

-}

-// build a table that decodes both magnitude and value of small ACs in

-// one go.

-static void stbi__build_fast_ac(stbi__int16* fast_ac, stbi__huffman* h) {

-	int i;

-	for (i = 0; i < (1 << FAST_BITS); ++i) {

-		stbi_uc fast = h->fast[i];

-		fast_ac[i] = 0;

-		if (fast < 255) {

-			int rs = h->values[fast];

-			int run = (rs >> 4) & 15;

-			int magbits = rs & 15;

-			int len = h->size[fast];

-			if (magbits && len + magbits <= FAST_BITS) {

-				// magnitude code followed by receive_extend code

-				int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);

-				int m = 1 << (magbits - 1);

-				if (k < m)

-					k += (~0U << magbits) + 1;

-				// if the result is small enough, we can fit it in fast_ac table

-				if (k >= -128 && k <= 127)

-					fast_ac[i] = (stbi__int16)((k * 256) + (run * 16) + (len + magbits));

-			}

-		}

-	}

-}

-static void stbi__grow_buffer_unsafe(stbi__jpeg* j) {

-	do {

-		unsigned int b = j->nomore ? 0 : stbi__get8(j->s);

-		if (b == 0xff) {

-			int c = stbi__get8(j->s);

-			while (c == 0xff)

-				c = stbi__get8(j->s); // consume fill bytes

-			if (c != 0) {

-				j->marker = (unsigned char)c;

-				j->nomore = 1;

-				return;

-			}

-		}

-		j->code_buffer |= b << (24 - j->code_bits);

-		j->code_bits += 8;

-	} while (j->code_bits <= 24);

-}

-// (1 << n) - 1

-static const stbi__uint32 stbi__bmask[17] = {0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535};

-// decode a jpeg huffman value from the bitstream

-stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg* j, stbi__huffman* h) {

-	unsigned int temp;

-	int c, k;

-	if (j->code_bits < 16)

-		stbi__grow_buffer_unsafe(j);

-	// look at the top FAST_BITS and determine what symbol ID it is,

-	// if the code is <= FAST_BITS

-	c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);

-	k = h->fast[c];

-	if (k < 255) {

-		int s = h->size[k];

-		if (s > j->code_bits)

-			return -1;

-		j->code_buffer <<= s;

-		j->code_bits -= s;

-		return h->values[k];

-	}

-	// naive test is to shift the code_buffer down so k bits are

-	// valid, then test against maxcode. To speed this up, we've

-	// preshifted maxcode left so that it has (16-k) 0s at the

-	// end; in other words, regardless of the number of bits, it

-	// wants to be compared against something shifted to have 16;

-	// that way we don't need to shift inside the loop.

-	temp = j->code_buffer >> 16;

-	for (k = FAST_BITS + 1;; ++k)

-		if (temp < h->maxcode[k])

-			break;

-	if (k == 17) {

-		// error! code not found

-		j->code_bits -= 16;

-		return -1;

-	}

-	if (k > j->code_bits)

-		return -1;

-	// convert the huffman code to the symbol id

-	c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];

-	STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);

-	// convert the id to a symbol

-	j->code_bits -= k;

-	j->code_buffer <<= k;

-	return h->values[c];

-}

-// bias[n] = (-1<<n) + 1

-static const int stbi__jbias[16] = {0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767};

-// combined JPEG 'receive' and JPEG 'extend', since baseline

-// always extends everything it receives.

-stbi_inline static int stbi__extend_receive(stbi__jpeg* j, int n) {

-	unsigned int k;

-	int sgn;

-	if (j->code_bits < n)

-		stbi__grow_buffer_unsafe(j);

-	sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB

-	k = stbi_lrot(j->code_buffer, n);

-	STBI_ASSERT(n >= 0 && n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask)));

-	j->code_buffer = k & ~stbi__bmask[n];

-	k &= stbi__bmask[n];

-	j->code_bits -= n;

-	return k + (stbi__jbias[n] & ~sgn);

-}

-// get some unsigned bits

-stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg* j, int n) {

-	unsigned int k;

-	if (j->code_bits < n)

-		stbi__grow_buffer_unsafe(j);

-	k = stbi_lrot(j->code_buffer, n);

-	j->code_buffer = k & ~stbi__bmask[n];

-	k &= stbi__bmask[n];

-	j->code_bits -= n;

-	return k;

-}

-stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg* j) {

-	unsigned int k;

-	if (j->code_bits < 1)

-		stbi__grow_buffer_unsafe(j);

-	k = j->code_buffer;

-	j->code_buffer <<= 1;

-	--j->code_bits;

-	return k & 0x80000000;

-}

-// given a value that's at position X in the zigzag stream,

-// where does it appear in the 8x8 matrix coded as row-major?

-static const stbi_uc stbi__jpeg_dezigzag[64 + 15] = {0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7,

-													 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46,

-													 53, 60, 61, 54, 47, 55, 62, 63,

-													 // let corrupt input sample past end

-													 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63};

-// decode one 64-entry block--

-static int stbi__jpeg_decode_block(stbi__jpeg* j, short data[64], stbi__huffman* hdc, stbi__huffman* hac, stbi__int16* fac, int b, stbi__uint16* dequant) {

-	int diff, dc, k;

-	int t;

-	if (j->code_bits < 16)

-		stbi__grow_buffer_unsafe(j);

-	t = stbi__jpeg_huff_decode(j, hdc);

-	if (t < 0)

-		return stbi__err("bad huffman code", "Corrupt JPEG");

-	// 0 all the ac values now so we can do it 32-bits at a time

-	memset(data, 0, 64 * sizeof(data[0]));

-	diff = t ? stbi__extend_receive(j, t) : 0;

-	dc = j->img_comp[b].dc_pred + diff;

-	j->img_comp[b].dc_pred = dc;

-	data[0] = (short)(dc * dequant[0]);

-	// decode AC components, see JPEG spec

-	k = 1;

-	do {

-		unsigned int zig;

-		int c, r, s;

-		if (j->code_bits < 16)

-			stbi__grow_buffer_unsafe(j);

-		c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);

-		r = fac[c];

-		if (r) {				// fast-AC path

-			k += (r >> 4) & 15; // run

-			s = r & 15;			// combined length

-			j->code_buffer <<= s;

-			j->code_bits -= s;

-			// decode into unzigzag'd location

-			zig = stbi__jpeg_dezigzag[k++];

-			data[zig] = (short)((r >> 8) * dequant[zig]);

-		} else {

-			int rs = stbi__jpeg_huff_decode(j, hac);

-			if (rs < 0)

-				return stbi__err("bad huffman code", "Corrupt JPEG");

-			s = rs & 15;

-			r = rs >> 4;

-			if (s == 0) {

-				if (rs != 0xf0)

-					break; // end block

-				k += 16;

-			} else {

-				k += r;

-				// decode into unzigzag'd location

-				zig = stbi__jpeg_dezigzag[k++];

-				data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);

-			}

-		}

-	} while (k < 64);

-	return 1;

-}

-static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg* j, short data[64], stbi__huffman* hdc, int b) {

-	int diff, dc;

-	int t;

-	if (j->spec_end != 0)

-		return stbi__err("can't merge dc and ac", "Corrupt JPEG");

-	if (j->code_bits < 16)

-		stbi__grow_buffer_unsafe(j);

-	if (j->succ_high == 0) {

-		// first scan for DC coefficient, must be first

-		memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now

-		t = stbi__jpeg_huff_decode(j, hdc);

-		diff = t ? stbi__extend_receive(j, t) : 0;

-		dc = j->img_comp[b].dc_pred + diff;

-		j->img_comp[b].dc_pred = dc;

-		data[0] = (short)(dc << j->succ_low);

-	} else {

-		// refinement scan for DC coefficient

-		if (stbi__jpeg_get_bit(j))

-			data[0] += (short)(1 << j->succ_low);

-	}

-	return 1;

-}

-// @OPTIMIZE: store non-zigzagged during the decode passes,

-// and only de-zigzag when dequantizing

-static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg* j, short data[64], stbi__huffman* hac, stbi__int16* fac) {

-	int k;

-	if (j->spec_start == 0)

-		return stbi__err("can't merge dc and ac", "Corrupt JPEG");

-	if (j->succ_high == 0) {

-		int shift = j->succ_low;

-		if (j->eob_run) {

-			--j->eob_run;

-			return 1;

-		}

-		k = j->spec_start;

-		do {

-			unsigned int zig;

-			int c, r, s;

-			if (j->code_bits < 16)

-				stbi__grow_buffer_unsafe(j);

-			c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);

-			r = fac[c];

-			if (r) {				// fast-AC path

-				k += (r >> 4) & 15; // run

-				s = r & 15;			// combined length

-				j->code_buffer <<= s;

-				j->code_bits -= s;

-				zig = stbi__jpeg_dezigzag[k++];

-				data[zig] = (short)((r >> 8) << shift);

-			} else {

-				int rs = stbi__jpeg_huff_decode(j, hac);

-				if (rs < 0)

-					return stbi__err("bad huffman code", "Corrupt JPEG");

-				s = rs & 15;

-				r = rs >> 4;

-				if (s == 0) {

-					if (r < 15) {

-						j->eob_run = (1 << r);

-						if (r)

-							j->eob_run += stbi__jpeg_get_bits(j, r);

-						--j->eob_run;

-						break;

-					}

-					k += 16;

-				} else {

-					k += r;

-					zig = stbi__jpeg_dezigzag[k++];

-					data[zig] = (short)(stbi__extend_receive(j, s) << shift);

-				}

-			}

-		} while (k <= j->spec_end);

-	} else {

-		// refinement scan for these AC coefficients

-		short bit = (short)(1 << j->succ_low);

-		if (j->eob_run) {

-			--j->eob_run;

-			for (k = j->spec_start; k <= j->spec_end; ++k) {

-				short* p = &data[stbi__jpeg_dezigzag[k]];

-				if (*p != 0)

-					if (stbi__jpeg_get_bit(j))

-						if ((*p & bit) == 0) {

-							if (*p > 0)

-								*p += bit;

-							else

-								*p -= bit;

-						}

-			}

-		} else {

-			k = j->spec_start;

-			do {

-				int r, s;

-				int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here,

-														 // advance-by-r is so slow, eh

-				if (rs < 0)

-					return stbi__err("bad huffman code", "Corrupt JPEG");

-				s = rs & 15;

-				r = rs >> 4;

-				if (s == 0) {

-					if (r < 15) {

-						j->eob_run = (1 << r) - 1;

-						if (r)

-							j->eob_run += stbi__jpeg_get_bits(j, r);

-						r = 64; // force end of block

-					} else {

-						// r=15 s=0 should write 16 0s, so we just do

-						// a run of 15 0s and then write s (which is 0),

-						// so we don't have to do anything special here

-					}

-				} else {

-					if (s != 1)

-						return stbi__err("bad huffman code", "Corrupt JPEG");

-					// sign bit

-					if (stbi__jpeg_get_bit(j))

-						s = bit;

-					else

-						s = -bit;

-				}

-				// advance by r

-				while (k <= j->spec_end) {

-					short* p = &data[stbi__jpeg_dezigzag[k++]];

-					if (*p != 0) {

-						if (stbi__jpeg_get_bit(j))

-							if ((*p & bit) == 0) {

-								if (*p > 0)

-									*p += bit;

-								else

-									*p -= bit;

-							}

-					} else {

-						if (r == 0) {

-							*p = (short)s;

-							break;

-						}

-						--r;

-					}

-				}

-			} while (k <= j->spec_end);

-		}

-	}

-	return 1;

-}

-// take a -128..127 value and stbi__clamp it and convert to 0..255

-stbi_inline static stbi_uc stbi__clamp(int x) {

-	// trick to use a single test to catch both cases

-	if ((unsigned int)x > 255) {

-		if (x < 0)

-			return 0;

-		if (x > 255)

-			return 255;

-	}

-	return (stbi_uc)x;

-}

-#define stbi__f2f(x) ((int)(((x)*4096 + 0.5)))

-#define stbi__fsh(x) ((x)*4096)

-// derived from jidctint -- DCT_ISLOW

-#define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7)                                                                                                          \

-	int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3;                                                                                                    \

-	p2 = s2;                                                                                                                                                   \

-	p3 = s6;                                                                                                                                                   \

-	p1 = (p2 + p3) * stbi__f2f(0.5411961f);                                                                                                                    \

-	t2 = p1 + p3 * stbi__f2f(-1.847759065f);                                                                                                                   \

-	t3 = p1 + p2 * stbi__f2f(0.765366865f);                                                                                                                    \

-	p2 = s0;                                                                                                                                                   \

-	p3 = s4;                                                                                                                                                   \

-	t0 = stbi__fsh(p2 + p3);                                                                                                                                   \

-	t1 = stbi__fsh(p2 - p3);                                                                                                                                   \

-	x0 = t0 + t3;                                                                                                                                              \

-	x3 = t0 - t3;                                                                                                                                              \

-	x1 = t1 + t2;                                                                                                                                              \

-	x2 = t1 - t2;                                                                                                                                              \

-	t0 = s7;                                                                                                                                                   \

-	t1 = s5;                                                                                                                                                   \

-	t2 = s3;                                                                                                                                                   \

-	t3 = s1;                                                                                                                                                   \

-	p3 = t0 + t2;                                                                                                                                              \

-	p4 = t1 + t3;                                                                                                                                              \

-	p1 = t0 + t3;                                                                                                                                              \

-	p2 = t1 + t2;                                                                                                                                              \

-	p5 = (p3 + p4) * stbi__f2f(1.175875602f);                                                                                                                  \

-	t0 = t0 * stbi__f2f(0.298631336f);                                                                                                                         \

-	t1 = t1 * stbi__f2f(2.053119869f);                                                                                                                         \

-	t2 = t2 * stbi__f2f(3.072711026f);                                                                                                                         \

-	t3 = t3 * stbi__f2f(1.501321110f);                                                                                                                         \

-	p1 = p5 + p1 * stbi__f2f(-0.899976223f);                                                                                                                   \

-	p2 = p5 + p2 * stbi__f2f(-2.562915447f);                                                                                                                   \

-	p3 = p3 * stbi__f2f(-1.961570560f);                                                                                                                        \

-	p4 = p4 * stbi__f2f(-0.390180644f);                                                                                                                        \

-	t3 += p1 + p4;                                                                                                                                             \

-	t2 += p2 + p3;                                                                                                                                             \

-	t1 += p2 + p4;                                                                                                                                             \

-	t0 += p1 + p3;

-static void stbi__idct_block(stbi_uc* out, int out_stride, short data[64]) {

-	int i, val[64], *v = val;

-	stbi_uc* o;

-	short* d = data;

-	// columns

-	for (i = 0; i < 8; ++i, ++d, ++v) {

-		// if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing

-		if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0) {

-			//    no shortcut                 0     seconds

-			//    (1|2|3|4|5|6|7)==0          0     seconds

-			//    all separate               -0.047 seconds

-			//    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds

-			int dcterm = d[0] * 4;

-			v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;

-		} else {

-			STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])

-			// constants scaled things up by 1<<12; let's bring them back

-			// down, but keep 2 extra bits of precision

-			x0 += 512;

-			x1 += 512;

-			x2 += 512;

-			x3 += 512;

-			v[0] = (x0 + t3) >> 10;

-			v[56] = (x0 - t3) >> 10;

-			v[8] = (x1 + t2) >> 10;

-			v[48] = (x1 - t2) >> 10;

-			v[16] = (x2 + t1) >> 10;

-			v[40] = (x2 - t1) >> 10;

-			v[24] = (x3 + t0) >> 10;

-			v[32] = (x3 - t0) >> 10;

-		}

-	}

-	for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {

-		// no fast case since the first 1D IDCT spread components out

-		STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])

-		// constants scaled things up by 1<<12, plus we had 1<<2 from first

-		// loop, plus horizontal and vertical each scale by sqrt(8) so together

-		// we've got an extra 1<<3, so 1<<17 total we need to remove.

-		// so we want to round that, which means adding 0.5 * 1<<17,

-		// aka 65536. Also, we'll end up with -128 to 127 that we want

-		// to encode as 0..255 by adding 128, so we'll add that before the shift

-		x0 += 65536 + (128 << 17);

-		x1 += 65536 + (128 << 17);

-		x2 += 65536 + (128 << 17);

-		x3 += 65536 + (128 << 17);

-		// tried computing the shifts into temps, or'ing the temps to see

-		// if any were out of range, but that was slower

-		o[0] = stbi__clamp((x0 + t3) >> 17);

-		o[7] = stbi__clamp((x0 - t3) >> 17);

-		o[1] = stbi__clamp((x1 + t2) >> 17);

-		o[6] = stbi__clamp((x1 - t2) >> 17);

-		o[2] = stbi__clamp((x2 + t1) >> 17);

-		o[5] = stbi__clamp((x2 - t1) >> 17);

-		o[3] = stbi__clamp((x3 + t0) >> 17);

-		o[4] = stbi__clamp((x3 - t0) >> 17);

-	}

-}

-#ifdef STBI_SSE2

-// sse2 integer IDCT. not the fastest possible implementation but it

-// produces bit-identical results to the generic C version so it's

-// fully "transparent".

-static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64]) {

-	// This is constructed to match our regular (generic) integer IDCT exactly.

-	__m128i row0, row1, row2, row3, row4, row5, row6, row7;

-	__m128i tmp;

-// dot product constant: even elems=x, odd elems=y

-#define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y))

-// out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)

-// out(1) = c1[even]*x + c1[odd]*y

-#define dct_rot(out0, out1, x, y, c0, c1)                                                                                                                      \

-	__m128i c0##lo = _mm_unpacklo_epi16((x), (y));                                                                                                             \

-	__m128i c0##hi = _mm_unpackhi_epi16((x), (y));                                                                                                             \

-	__m128i out0##_l = _mm_madd_epi16(c0##lo, c0);                                                                                                             \

-	__m128i out0##_h = _mm_madd_epi16(c0##hi, c0);                                                                                                             \

-	__m128i out1##_l = _mm_madd_epi16(c0##lo, c1);                                                                                                             \

-	__m128i out1##_h = _mm_madd_epi16(c0##hi, c1)

-// out = in << 12  (in 16-bit, out 32-bit)

-#define dct_widen(out, in)                                                                                                                                     \

-	__m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4);                                                                        \

-	__m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)

-// wide add

-#define dct_wadd(out, a, b)                                                                                                                                    \

-	__m128i out##_l = _mm_add_epi32(a##_l, b##_l);                                                                                                             \

-	__m128i out##_h = _mm_add_epi32(a##_h, b##_h)

-// wide sub

-#define dct_wsub(out, a, b)                                                                                                                                    \

-	__m128i out##_l = _mm_sub_epi32(a##_l, b##_l);                                                                                                             \

-	__m128i out##_h = _mm_sub_epi32(a##_h, b##_h)

-// butterfly a/b, add bias, then shift by "s" and pack

-#define dct_bfly32o(out0, out1, a, b, bias, s)                                                                                                                 \

-	{                                                                                                                                                          \

-		__m128i abiased_l = _mm_add_epi32(a##_l, bias);                                                                                                        \

-		__m128i abiased_h = _mm_add_epi32(a##_h, bias);                                                                                                        \

-		dct_wadd(sum, abiased, b);                                                                                                                             \

-		dct_wsub(dif, abiased, b);                                                                                                                             \

-		out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s));                                                                            \

-		out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s));                                                                            \

-	}

-// 8-bit interleave step (for transposes)

-#define dct_interleave8(a, b)                                                                                                                                  \

-	tmp = a;                                                                                                                                                   \

-	a = _mm_unpacklo_epi8(a, b);                                                                                                                               \

-	b = _mm_unpackhi_epi8(tmp, b)

-// 16-bit interleave step (for transposes)

-#define dct_interleave16(a, b)                                                                                                                                 \

-	tmp = a;                                                                                                                                                   \

-	a = _mm_unpacklo_epi16(a, b);                                                                                                                              \

-	b = _mm_unpackhi_epi16(tmp, b)

-#define dct_pass(bias, shift)                                                                                                                                  \

-	{                                                                                                                                                          \

-		/* even part */                                                                                                                                        \

-		dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1);                                                                                                         \

-		__m128i sum04 = _mm_add_epi16(row0, row4);                                                                                                             \

-		__m128i dif04 = _mm_sub_epi16(row0, row4);                                                                                                             \

-		dct_widen(t0e, sum04);                                                                                                                                 \

-		dct_widen(t1e, dif04);                                                                                                                                 \

-		dct_wadd(x0, t0e, t3e);                                                                                                                                \

-		dct_wsub(x3, t0e, t3e);                                                                                                                                \

-		dct_wadd(x1, t1e, t2e);                                                                                                                                \

-		dct_wsub(x2, t1e, t2e);                                                                                                                                \

-		/* odd part */                                                                                                                                         \

-		dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1);                                                                                                         \

-		dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1);                                                                                                         \

-		__m128i sum17 = _mm_add_epi16(row1, row7);                                                                                                             \

-		__m128i sum35 = _mm_add_epi16(row3, row5);                                                                                                             \

-		dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1);                                                                                                       \

-		dct_wadd(x4, y0o, y4o);                                                                                                                                \

-		dct_wadd(x5, y1o, y5o);                                                                                                                                \

-		dct_wadd(x6, y2o, y5o);                                                                                                                                \

-		dct_wadd(x7, y3o, y4o);                                                                                                                                \

-		dct_bfly32o(row0, row7, x0, x7, bias, shift);                                                                                                          \

-		dct_bfly32o(row1, row6, x1, x6, bias, shift);                                                                                                          \

-		dct_bfly32o(row2, row5, x2, x5, bias, shift);                                                                                                          \

-		dct_bfly32o(row3, row4, x3, x4, bias, shift);                                                                                                          \

-	}

-	__m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));

-	__m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));

-	__m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));

-	__m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));

-	__m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));

-	__m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));

-	__m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));

-	__m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));

-	// rounding biases in column/row passes, see stbi__idct_block for

-	// explanation.

-	__m128i bias_0 = _mm_set1_epi32(512);

-	__m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));

-	// load

-	row0 = _mm_load_si128((const __m128i*)(data + 0 * 8));

-	row1 = _mm_load_si128((const __m128i*)(data + 1 * 8));

-	row2 = _mm_load_si128((const __m128i*)(data + 2 * 8));

-	row3 = _mm_load_si128((const __m128i*)(data + 3 * 8));

-	row4 = _mm_load_si128((const __m128i*)(data + 4 * 8));

-	row5 = _mm_load_si128((const __m128i*)(data + 5 * 8));

-	row6 = _mm_load_si128((const __m128i*)(data + 6 * 8));

-	row7 = _mm_load_si128((const __m128i*)(data + 7 * 8));

-	// column pass

-	dct_pass(bias_0, 10);

-	{

-		// 16bit 8x8 transpose pass 1

-		dct_interleave16(row0, row4);

-		dct_interleave16(row1, row5);

-		dct_interleave16(row2, row6);

-		dct_interleave16(row3, row7);

-		// transpose pass 2

-		dct_interleave16(row0, row2);

-		dct_interleave16(row1, row3);

-		dct_interleave16(row4, row6);

-		dct_interleave16(row5, row7);

-		// transpose pass 3

-		dct_interleave16(row0, row1);

-		dct_interleave16(row2, row3);

-		dct_interleave16(row4, row5);

-		dct_interleave16(row6, row7);

-	}

-	// row pass

-	dct_pass(bias_1, 17);

-	{

-		// pack

-		__m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7

-		__m128i p1 = _mm_packus_epi16(row2, row3);

-		__m128i p2 = _mm_packus_epi16(row4, row5);

-		__m128i p3 = _mm_packus_epi16(row6, row7);

-		// 8bit 8x8 transpose pass 1

-		dct_interleave8(p0, p2); // a0e0a1e1...

-		dct_interleave8(p1, p3); // c0g0c1g1...

-		// transpose pass 2

-		dct_interleave8(p0, p1); // a0c0e0g0...

-		dct_interleave8(p2, p3); // b0d0f0h0...

-		// transpose pass 3

-		dct_interleave8(p0, p2); // a0b0c0d0...

-		dct_interleave8(p1, p3); // a4b4c4d4...

-		// store

-		_mm_storel_epi64((__m128i*)out, p0);

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p0, 0x4e));

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, p2);

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p2, 0x4e));

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, p1);

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p1, 0x4e));

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, p3);

-		out += out_stride;

-		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p3, 0x4e));

-	}

-#undef dct_const

-#undef dct_rot

-#undef dct_widen

-#undef dct_wadd

-#undef dct_wsub

-#undef dct_bfly32o

-#undef dct_interleave8

-#undef dct_interleave16

-#undef dct_pass

-}

-#endif // STBI_SSE2

-#ifdef STBI_NEON

-// NEON integer IDCT. should produce bit-identical

-// results to the generic C version.

-static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64]) {

-	int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;

-	int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));

-	int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));

-	int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));

-	int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));

-	int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));

-	int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));

-	int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));

-	int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));

-	int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));

-	int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));

-	int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));

-	int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));

-#define dct_long_mul(out, inq, coeff)                                                                                                                          \

-	int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff);                                                                                                   \

-	int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)

-#define dct_long_mac(out, acc, inq, coeff)                                                                                                                     \

-	int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff);                                                                                          \

-	int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)

-#define dct_widen(out, inq)                                                                                                                                    \

-	int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12);                                                                                                    \

-	int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)

-// wide add

-#define dct_wadd(out, a, b)                                                                                                                                    \

-	int32x4_t out##_l = vaddq_s32(a##_l, b##_l);                                                                                                               \

-	int32x4_t out##_h = vaddq_s32(a##_h, b##_h)

-// wide sub

-#define dct_wsub(out, a, b)                                                                                                                                    \

-	int32x4_t out##_l = vsubq_s32(a##_l, b##_l);                                                                                                               \

-	int32x4_t out##_h = vsubq_s32(a##_h, b##_h)

-// butterfly a/b, then shift using "shiftop" by "s" and pack

-#define dct_bfly32o(out0, out1, a, b, shiftop, s)                                                                                                              \

-	{                                                                                                                                                          \

-		dct_wadd(sum, a, b);                                                                                                                                   \

-		dct_wsub(dif, a, b);                                                                                                                                   \

-		out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s));                                                                                             \

-		out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s));                                                                                             \

-	}

-#define dct_pass(shiftop, shift)                                                                                                                               \

-	{                                                                                                                                                          \

-		/* even part */                                                                                                                                        \

-		int16x8_t sum26 = vaddq_s16(row2, row6);                                                                                                               \

-		dct_long_mul(p1e, sum26, rot0_0);                                                                                                                      \

-		dct_long_mac(t2e, p1e, row6, rot0_1);                                                                                                                  \

-		dct_long_mac(t3e, p1e, row2, rot0_2);                                                                                                                  \

-		int16x8_t sum04 = vaddq_s16(row0, row4);                                                                                                               \

-		int16x8_t dif04 = vsubq_s16(row0, row4);                                                                                                               \

-		dct_widen(t0e, sum04);                                                                                                                                 \

-		dct_widen(t1e, dif04);                                                                                                                                 \

-		dct_wadd(x0, t0e, t3e);                                                                                                                                \

-		dct_wsub(x3, t0e, t3e);                                                                                                                                \

-		dct_wadd(x1, t1e, t2e);                                                                                                                                \

-		dct_wsub(x2, t1e, t2e);                                                                                                                                \

-		/* odd part */                                                                                                                                         \

-		int16x8_t sum15 = vaddq_s16(row1, row5);                                                                                                               \

-		int16x8_t sum17 = vaddq_s16(row1, row7);                                                                                                               \

-		int16x8_t sum35 = vaddq_s16(row3, row5);                                                                                                               \

-		int16x8_t sum37 = vaddq_s16(row3, row7);                                                                                                               \

-		int16x8_t sumodd = vaddq_s16(sum17, sum35);                                                                                                            \

-		dct_long_mul(p5o, sumodd, rot1_0);                                                                                                                     \

-		dct_long_mac(p1o, p5o, sum17, rot1_1);                                                                                                                 \

-		dct_long_mac(p2o, p5o, sum35, rot1_2);                                                                                                                 \

-		dct_long_mul(p3o, sum37, rot2_0);                                                                                                                      \

-		dct_long_mul(p4o, sum15, rot2_1);                                                                                                                      \

-		dct_wadd(sump13o, p1o, p3o);                                                                                                                           \

-		dct_wadd(sump24o, p2o, p4o);                                                                                                                           \

-		dct_wadd(sump23o, p2o, p3o);                                                                                                                           \

-		dct_wadd(sump14o, p1o, p4o);                                                                                                                           \

-		dct_long_mac(x4, sump13o, row7, rot3_0);                                                                                                               \

-		dct_long_mac(x5, sump24o, row5, rot3_1);                                                                                                               \

-		dct_long_mac(x6, sump23o, row3, rot3_2);                                                                                                               \

-		dct_long_mac(x7, sump14o, row1, rot3_3);                                                                                                               \

-		dct_bfly32o(row0, row7, x0, x7, shiftop, shift);                                                                                                       \

-		dct_bfly32o(row1, row6, x1, x6, shiftop, shift);                                                                                                       \

-		dct_bfly32o(row2, row5, x2, x5, shiftop, shift);                                                                                                       \

-		dct_bfly32o(row3, row4, x3, x4, shiftop, shift);                                                                                                       \

-	}

-	// load

-	row0 = vld1q_s16(data + 0 * 8);

-	row1 = vld1q_s16(data + 1 * 8);

-	row2 = vld1q_s16(data + 2 * 8);

-	row3 = vld1q_s16(data + 3 * 8);

-	row4 = vld1q_s16(data + 4 * 8);

-	row5 = vld1q_s16(data + 5 * 8);

-	row6 = vld1q_s16(data + 6 * 8);

-	row7 = vld1q_s16(data + 7 * 8);

-	// add DC bias

-	row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));

-	// column pass

-	dct_pass(vrshrn_n_s32, 10);

-	// 16bit 8x8 transpose

-	{

-// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.

-// whether compilers actually get this is another story, sadly.

-#define dct_trn16(x, y)                                                                                                                                        \

-	{                                                                                                                                                          \

-		int16x8x2_t t = vtrnq_s16(x, y);                                                                                                                       \

-		x = t.val[0];                                                                                                                                          \

-		y = t.val[1];                                                                                                                                          \

-	}

-#define dct_trn32(x, y)                                                                                                                                        \

-	{                                                                                                                                                          \

-		int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y));                                                                         \

-		x = vreinterpretq_s16_s32(t.val[0]);                                                                                                                   \

-		y = vreinterpretq_s16_s32(t.val[1]);                                                                                                                   \

-	}

-#define dct_trn64(x, y)                                                                                                                                        \

-	{                                                                                                                                                          \

-		int16x8_t x0 = x;                                                                                                                                      \

-		int16x8_t y0 = y;                                                                                                                                      \

-		x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0));                                                                                                  \

-		y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0));                                                                                                \

-	}

-		// pass 1

-		dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6

-		dct_trn16(row2, row3);

-		dct_trn16(row4, row5);

-		dct_trn16(row6, row7);

-		// pass 2

-		dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4

-		dct_trn32(row1, row3);

-		dct_trn32(row4, row6);

-		dct_trn32(row5, row7);

-		// pass 3

-		dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0

-		dct_trn64(row1, row5);

-		dct_trn64(row2, row6);

-		dct_trn64(row3, row7);

-#undef dct_trn16

-#undef dct_trn32

-#undef dct_trn64

-	}

-	// row pass

-	// vrshrn_n_s32 only supports shifts up to 16, we need

-	// 17. so do a non-rounding shift of 16 first then follow

-	// up with a rounding shift by 1.

-	dct_pass(vshrn_n_s32, 16);

-	{

-		// pack and round

-		uint8x8_t p0 = vqrshrun_n_s16(row0, 1);

-		uint8x8_t p1 = vqrshrun_n_s16(row1, 1);

-		uint8x8_t p2 = vqrshrun_n_s16(row2, 1);

-		uint8x8_t p3 = vqrshrun_n_s16(row3, 1);

-		uint8x8_t p4 = vqrshrun_n_s16(row4, 1);

-		uint8x8_t p5 = vqrshrun_n_s16(row5, 1);

-		uint8x8_t p6 = vqrshrun_n_s16(row6, 1);

-		uint8x8_t p7 = vqrshrun_n_s16(row7, 1);

-		// again, these can translate into one instruction, but often don't.

-#define dct_trn8_8(x, y)                                                                                                                                       \

-	{                                                                                                                                                          \

-		uint8x8x2_t t = vtrn_u8(x, y);                                                                                                                         \

-		x = t.val[0];                                                                                                                                          \

-		y = t.val[1];                                                                                                                                          \

-	}

-#define dct_trn8_16(x, y)                                                                                                                                      \

-	{                                                                                                                                                          \

-		uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y));                                                                             \

-		x = vreinterpret_u8_u16(t.val[0]);                                                                                                                     \

-		y = vreinterpret_u8_u16(t.val[1]);                                                                                                                     \

-	}

-#define dct_trn8_32(x, y)                                                                                                                                      \

-	{                                                                                                                                                          \

-		uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y));                                                                             \

-		x = vreinterpret_u8_u32(t.val[0]);                                                                                                                     \

-		y = vreinterpret_u8_u32(t.val[1]);                                                                                                                     \

-	}

-		// sadly can't use interleaved stores here since we only write

-		// 8 bytes to each scan line!

-		// 8x8 8-bit transpose pass 1

-		dct_trn8_8(p0, p1);

-		dct_trn8_8(p2, p3);

-		dct_trn8_8(p4, p5);

-		dct_trn8_8(p6, p7);

-		// pass 2

-		dct_trn8_16(p0, p2);

-		dct_trn8_16(p1, p3);

-		dct_trn8_16(p4, p6);

-		dct_trn8_16(p5, p7);

-		// pass 3

-		dct_trn8_32(p0, p4);

-		dct_trn8_32(p1, p5);

-		dct_trn8_32(p2, p6);

-		dct_trn8_32(p3, p7);

-		// store

-		vst1_u8(out, p0);

-		out += out_stride;

-		vst1_u8(out, p1);

-		out += out_stride;

-		vst1_u8(out, p2);

-		out += out_stride;

-		vst1_u8(out, p3);

-		out += out_stride;

-		vst1_u8(out, p4);

-		out += out_stride;

-		vst1_u8(out, p5);

-		out += out_stride;

-		vst1_u8(out, p6);

-		out += out_stride;

-		vst1_u8(out, p7);

-#undef dct_trn8_8

-#undef dct_trn8_16

-#undef dct_trn8_32

-	}

-#undef dct_long_mul

-#undef dct_long_mac

-#undef dct_widen

-#undef dct_wadd

-#undef dct_wsub

-#undef dct_bfly32o

-#undef dct_pass

-}

-#endif // STBI_NEON

-#define STBI__MARKER_none 0xff

-// if there's a pending marker from the entropy stream, return that

-// otherwise, fetch from the stream and get a marker. if there's no

-// marker, return 0xff, which is never a valid marker value

-static stbi_uc stbi__get_marker(stbi__jpeg* j) {

-	stbi_uc x;

-	if (j->marker != STBI__MARKER_none) {

-		x = j->marker;

-		j->marker = STBI__MARKER_none;

-		return x;

-	}

-	x = stbi__get8(j->s);

-	if (x != 0xff)

-		return STBI__MARKER_none;

-	while (x == 0xff)

-		x = stbi__get8(j->s); // consume repeated 0xff fill bytes

-	return x;

-}

-// in each scan, we'll have scan_n components, and the order

-// of the components is specified by order[]

-#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)

-// after a restart interval, stbi__jpeg_reset the entropy decoder and

-// the dc prediction

-static void stbi__jpeg_reset(stbi__jpeg* j) {

-	j->code_bits = 0;

-	j->code_buffer = 0;

-	j->nomore = 0;

-	j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;

-	j->marker = STBI__MARKER_none;

-	j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;

-	j->eob_run = 0;

-	// no more than 1<<31 MCUs if no restart_interal? that's plenty safe,

-	// since we don't even allow 1<<30 pixels

-}

-static int stbi__parse_entropy_coded_data(stbi__jpeg* z) {

-	stbi__jpeg_reset(z);

-	if (!z->progressive) {

-		if (z->scan_n == 1) {

-			int i, j;

-			STBI_SIMD_ALIGN(short, data[64]);

-			int n = z->order[0];

-			// non-interleaved data, we just need to process one block at a

-			// time, in trivial scanline order number of blocks to do just

-			// depends on how many actual "pixels" this component has,

-			// independent of interleaved MCU blocking and such

-			int w = (z->img_comp[n].x + 7) >> 3;

-			int h = (z->img_comp[n].y + 7) >> 3;

-			for (j = 0; j < h; ++j) {

-				for (i = 0; i < w; ++i) {

-					int ha = z->img_comp[n].ha;

-					if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))

-						return 0;

-					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);

-					// every data block is an MCU, so countdown the restart

-					// interval

-					if (--z->todo <= 0) {

-						if (z->code_bits < 24)

-							stbi__grow_buffer_unsafe(z);

-						// if it's NOT a restart, then just bail, so we get

-						// corrupt data rather than no data

-						if (!STBI__RESTART(z->marker))

-							return 1;

-						stbi__jpeg_reset(z);

-					}

-				}

-			}

-			return 1;

-		} else { // interleaved

-			int i, j, k, x, y;

-			STBI_SIMD_ALIGN(short, data[64]);

-			for (j = 0; j < z->img_mcu_y; ++j) {

-				for (i = 0; i < z->img_mcu_x; ++i) {

-					// scan an interleaved mcu... process scan_n components in

-					// order

-					for (k = 0; k < z->scan_n; ++k) {

-						int n = z->order[k];

-						// scan out an mcu's worth of this component; that's

-						// just determined by the basic H and V specified for

-						// the component

-						for (y = 0; y < z->img_comp[n].v; ++y) {

-							for (x = 0; x < z->img_comp[n].h; ++x) {

-								int x2 = (i * z->img_comp[n].h + x) * 8;

-								int y2 = (j * z->img_comp[n].v + y) * 8;

-								int ha = z->img_comp[n].ha;

-								if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n,

-															 z->dequant[z->img_comp[n].tq]))

-									return 0;

-								z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data);

-							}

-						}

-					}

-					// after all interleaved components, that's an interleaved

-					// MCU, so now count down the restart interval

-					if (--z->todo <= 0) {

-						if (z->code_bits < 24)

-							stbi__grow_buffer_unsafe(z);

-						if (!STBI__RESTART(z->marker))

-							return 1;

-						stbi__jpeg_reset(z);

-					}

-				}

-			}

-			return 1;

-		}

-	} else {

-		if (z->scan_n == 1) {

-			int i, j;

-			int n = z->order[0];

-			// non-interleaved data, we just need to process one block at a

-			// time, in trivial scanline order number of blocks to do just

-			// depends on how many actual "pixels" this component has,

-			// independent of interleaved MCU blocking and such

-			int w = (z->img_comp[n].x + 7) >> 3;

-			int h = (z->img_comp[n].y + 7) >> 3;

-			for (j = 0; j < h; ++j) {

-				for (i = 0; i < w; ++i) {

-					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);

-					if (z->spec_start == 0) {

-						if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))

-							return 0;

-					} else {

-						int ha = z->img_comp[n].ha;

-						if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))

-							return 0;

-					}

-					// every data block is an MCU, so countdown the restart

-					// interval

-					if (--z->todo <= 0) {

-						if (z->code_bits < 24)

-							stbi__grow_buffer_unsafe(z);

-						if (!STBI__RESTART(z->marker))

-							return 1;

-						stbi__jpeg_reset(z);

-					}

-				}

-			}

-			return 1;

-		} else { // interleaved

-			int i, j, k, x, y;

-			for (j = 0; j < z->img_mcu_y; ++j) {

-				for (i = 0; i < z->img_mcu_x; ++i) {

-					// scan an interleaved mcu... process scan_n components in

-					// order

-					for (k = 0; k < z->scan_n; ++k) {

-						int n = z->order[k];

-						// scan out an mcu's worth of this component; that's

-						// just determined by the basic H and V specified for

-						// the component

-						for (y = 0; y < z->img_comp[n].v; ++y) {

-							for (x = 0; x < z->img_comp[n].h; ++x) {

-								int x2 = (i * z->img_comp[n].h + x);

-								int y2 = (j * z->img_comp[n].v + y);

-								short* data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);

-								if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))

-									return 0;

-							}

-						}

-					}

-					// after all interleaved components, that's an interleaved

-					// MCU, so now count down the restart interval

-					if (--z->todo <= 0) {

-						if (z->code_bits < 24)

-							stbi__grow_buffer_unsafe(z);

-						if (!STBI__RESTART(z->marker))

-							return 1;

-						stbi__jpeg_reset(z);

-					}

-				}

-			}

-			return 1;

-		}

-	}

-}

-static void stbi__jpeg_dequantize(short* data, stbi__uint16* dequant) {

-	int i;

-	for (i = 0; i < 64; ++i)

-		data[i] *= dequant[i];

-}

-static void stbi__jpeg_finish(stbi__jpeg* z) {

-	if (z->progressive) {

-		// dequantize and idct the data

-		int i, j, n;

-		for (n = 0; n < z->s->img_n; ++n) {

-			int w = (z->img_comp[n].x + 7) >> 3;

-			int h = (z->img_comp[n].y + 7) >> 3;

-			for (j = 0; j < h; ++j) {

-				for (i = 0; i < w; ++i) {

-					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);

-					stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);

-					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);

-				}

-			}

-		}

-	}

-}

-static int stbi__process_marker(stbi__jpeg* z, int m) {

-	int L;

-	switch (m) {

-	case STBI__MARKER_none: // no marker found

-		return stbi__err("expected marker", "Corrupt JPEG");

-	case 0xDD: // DRI - specify restart interval

-		if (stbi__get16be(z->s) != 4)

-			return stbi__err("bad DRI len", "Corrupt JPEG");

-		z->restart_interval = stbi__get16be(z->s);

-		return 1;

-	case 0xDB: // DQT - define quantization table

-		L = stbi__get16be(z->s) - 2;

-		while (L > 0) {

-			int q = stbi__get8(z->s);

-			int p = q >> 4, sixteen = (p != 0);

-			int t = q & 15, i;

-			if (p != 0 && p != 1)

-				return stbi__err("bad DQT type", "Corrupt JPEG");

-			if (t > 3)

-				return stbi__err("bad DQT table", "Corrupt JPEG");

-			for (i = 0; i < 64; ++i)

-				z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));

-			L -= (sixteen ? 129 : 65);

-		}

-		return L == 0;

-	case 0xC4: // DHT - define huffman table

-		L = stbi__get16be(z->s) - 2;

-		while (L > 0) {

-			stbi_uc* v;

-			int sizes[16], i, n = 0;

-			int q = stbi__get8(z->s);

-			int tc = q >> 4;

-			int th = q & 15;

-			if (tc > 1 || th > 3)

-				return stbi__err("bad DHT header", "Corrupt JPEG");

-			for (i = 0; i < 16; ++i) {

-				sizes[i] = stbi__get8(z->s);

-				n += sizes[i];

-			}

-			L -= 17;

-			if (tc == 0) {

-				if (!stbi__build_huffman(z->huff_dc + th, sizes))

-					return 0;

-				v = z->huff_dc[th].values;

-			} else {

-				if (!stbi__build_huffman(z->huff_ac + th, sizes))

-					return 0;

-				v = z->huff_ac[th].values;

-			}

-			for (i = 0; i < n; ++i)

-				v[i] = stbi__get8(z->s);

-			if (tc != 0)

-				stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);

-			L -= n;

-		}

-		return L == 0;

-	}

-	// check for comment block or APP blocks

-	if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {

-		L = stbi__get16be(z->s);

-		if (L < 2) {

-			if (m == 0xFE)

-				return stbi__err("bad COM len", "Corrupt JPEG");

-			else

-				return stbi__err("bad APP len", "Corrupt JPEG");

-		}

-		L -= 2;

-		if (m == 0xE0 && L >= 5) { // JFIF APP0 segment

-			static const unsigned char tag[5] = {'J', 'F', 'I', 'F', '\0'};

-			int ok = 1;

-			int i;

-			for (i = 0; i < 5; ++i)

-				if (stbi__get8(z->s) != tag[i])

-					ok = 0;

-			L -= 5;

-			if (ok)

-				z->jfif = 1;

-		} else if (m == 0xEE && L >= 12) { // Adobe APP14 segment

-			static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'};

-			int ok = 1;

-			int i;

-			for (i = 0; i < 6; ++i)

-				if (stbi__get8(z->s) != tag[i])

-					ok = 0;

-			L -= 6;

-			if (ok) {

-				stbi__get8(z->s);							 // version

-				stbi__get16be(z->s);						 // flags0

-				stbi__get16be(z->s);						 // flags1

-				z->app14_color_transform = stbi__get8(z->s); // color transform

-				L -= 6;

-			}

-		}

-		stbi__skip(z->s, L);

-		return 1;

-	}

-	return stbi__err("unknown marker", "Corrupt JPEG");

-}

-// after we see SOS

-static int stbi__process_scan_header(stbi__jpeg* z) {

-	int i;

-	int Ls = stbi__get16be(z->s);

-	z->scan_n = stbi__get8(z->s);

-	if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n)

-		return stbi__err("bad SOS component count", "Corrupt JPEG");

-	if (Ls != 6 + 2 * z->scan_n)

-		return stbi__err("bad SOS len", "Corrupt JPEG");

-	for (i = 0; i < z->scan_n; ++i) {

-		int id = stbi__get8(z->s), which;

-		int q = stbi__get8(z->s);

-		for (which = 0; which < z->s->img_n; ++which)

-			if (z->img_comp[which].id == id)

-				break;

-		if (which == z->s->img_n)

-			return 0; // no match

-		z->img_comp[which].hd = q >> 4;

-		if (z->img_comp[which].hd > 3)

-			return stbi__err("bad DC huff", "Corrupt JPEG");

-		z->img_comp[which].ha = q & 15;

-		if (z->img_comp[which].ha > 3)

-			return stbi__err("bad AC huff", "Corrupt JPEG");

-		z->order[i] = which;

-	}

-	{

-		int aa;

-		z->spec_start = stbi__get8(z->s);

-		z->spec_end = stbi__get8(z->s); // should be 63, but might be 0

-		aa = stbi__get8(z->s);

-		z->succ_high = (aa >> 4);

-		z->succ_low = (aa & 15);

-		if (z->progressive) {

-			if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)

-				return stbi__err("bad SOS", "Corrupt JPEG");

-		} else {

-			if (z->spec_start != 0)

-				return stbi__err("bad SOS", "Corrupt JPEG");

-			if (z->succ_high != 0 || z->succ_low != 0)

-				return stbi__err("bad SOS", "Corrupt JPEG");

-			z->spec_end = 63;

-		}

-	}

-	return 1;

-}

-static int stbi__free_jpeg_components(stbi__jpeg* z, int ncomp, int why) {

-	int i;

-	for (i = 0; i < ncomp; ++i) {

-		if (z->img_comp[i].raw_data) {

-			STBI_FREE(z->img_comp[i].raw_data);

-			z->img_comp[i].raw_data = NULL;

-			z->img_comp[i].data = NULL;

-		}

-		if (z->img_comp[i].raw_coeff) {

-			STBI_FREE(z->img_comp[i].raw_coeff);

-			z->img_comp[i].raw_coeff = 0;

-			z->img_comp[i].coeff = 0;

-		}

-		if (z->img_comp[i].linebuf) {

-			STBI_FREE(z->img_comp[i].linebuf);

-			z->img_comp[i].linebuf = NULL;

-		}

-	}

-	return why;

-}

-static int stbi__process_frame_header(stbi__jpeg* z, int scan) {

-	stbi__context* s = z->s;

-	int Lf, p, i, q, h_max = 1, v_max = 1, c;

-	Lf = stbi__get16be(s);

-	if (Lf < 11)

-		return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG

-	p = stbi__get8(s);

-	if (p != 8)

-		return stbi__err("only 8-bit",

-						 "JPEG format not supported: 8-bit only"); // JPEG baseline

-	s->img_y = stbi__get16be(s);

-	if (s->img_y == 0)

-		return stbi__err("no header height",

-						 "JPEG format not supported: delayed height"); // Legal, but we don't

-																	   // handle it--but

-																	   // neither does IJG

-	s->img_x = stbi__get16be(s);

-	if (s->img_x == 0)

-		return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires

-	c = stbi__get8(s);

-	if (c != 3 && c != 1 && c != 4)

-		return stbi__err("bad component count", "Corrupt JPEG");

-	s->img_n = c;

-	for (i = 0; i < c; ++i) {

-		z->img_comp[i].data = NULL;

-		z->img_comp[i].linebuf = NULL;

-	}

-	if (Lf != 8 + 3 * s->img_n)

-		return stbi__err("bad SOF len", "Corrupt JPEG");

-	z->rgb = 0;

-	for (i = 0; i < s->img_n; ++i) {

-		static const unsigned char rgb[3] = {'R', 'G', 'B'};

-		z->img_comp[i].id = stbi__get8(s);

-		if (s->img_n == 3 && z->img_comp[i].id == rgb[i])

-			++z->rgb;

-		q = stbi__get8(s);

-		z->img_comp[i].h = (q >> 4);

-		if (!z->img_comp[i].h || z->img_comp[i].h > 4)

-			return stbi__err("bad H", "Corrupt JPEG");

-		z->img_comp[i].v = q & 15;

-		if (!z->img_comp[i].v || z->img_comp[i].v > 4)

-			return stbi__err("bad V", "Corrupt JPEG");

-		z->img_comp[i].tq = stbi__get8(s);

-		if (z->img_comp[i].tq > 3)

-			return stbi__err("bad TQ", "Corrupt JPEG");

-	}

-	if (scan != STBI__SCAN_load)

-		return 1;

-	if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0))

-		return stbi__err("too large", "Image too large to decode");

-	for (i = 0; i < s->img_n; ++i) {

-		if (z->img_comp[i].h > h_max)

-			h_max = z->img_comp[i].h;

-		if (z->img_comp[i].v > v_max)

-			v_max = z->img_comp[i].v;

-	}

-	// compute interleaved mcu info

-	z->img_h_max = h_max;

-	z->img_v_max = v_max;

-	z->img_mcu_w = h_max * 8;

-	z->img_mcu_h = v_max * 8;

-	// these sizes can't be more than 17 bits

-	z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;

-	z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;

-	for (i = 0; i < s->img_n; ++i) {

-		// number of effective pixels (e.g. for non-interleaved MCU)

-		z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;

-		z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;

-		// to simplify generation, we'll allocate enough memory to decode

-		// the bogus oversized data from using interleaved MCUs and their

-		// big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't

-		// discard the extra data until colorspace conversion

-		//

-		// img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked

-		// earlier) so these muls can't overflow with 32-bit ints (which we

-		// require)

-		z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;

-		z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;

-		z->img_comp[i].coeff = 0;

-		z->img_comp[i].raw_coeff = 0;

-		z->img_comp[i].linebuf = NULL;

-		z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);

-		if (z->img_comp[i].raw_data == NULL)

-			return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));

-		// align blocks for idct using mmx/sse

-		z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);

-		if (z->progressive) {

-			// w2, h2 are multiples of 8 (see above)

-			z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;

-			z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;

-			z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);

-			if (z->img_comp[i].raw_coeff == NULL)

-				return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));

-			z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);

-		}

-	}

-	return 1;

-}

-// use comparisons since in some cases we handle more than one case (e.g. SOF)

-#define stbi__DNL(x) ((x) == 0xdc)

-#define stbi__SOI(x) ((x) == 0xd8)

-#define stbi__EOI(x) ((x) == 0xd9)

-#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)

-#define stbi__SOS(x) ((x) == 0xda)

-#define stbi__SOF_progressive(x) ((x) == 0xc2)

-static int stbi__decode_jpeg_header(stbi__jpeg* z, int scan) {

-	int m;

-	z->jfif = 0;

-	z->app14_color_transform = -1; // valid values are 0,1,2

-	z->marker = STBI__MARKER_none; // initialize cached marker to empty

-	m = stbi__get_marker(z);

-	if (!stbi__SOI(m))

-		return stbi__err("no SOI", "Corrupt JPEG");

-	if (scan == STBI__SCAN_type)

-		return 1;

-	m = stbi__get_marker(z);

-	while (!stbi__SOF(m)) {

-		if (!stbi__process_marker(z, m))

-			return 0;

-		m = stbi__get_marker(z);

-		while (m == STBI__MARKER_none) {

-			// some files have extra padding after their blocks, so ok, we'll

-			// scan

-			if (stbi__at_eof(z->s))

-				return stbi__err("no SOF", "Corrupt JPEG");

-			m = stbi__get_marker(z);

-		}

-	}

-	z->progressive = stbi__SOF_progressive(m);

-	if (!stbi__process_frame_header(z, scan))

-		return 0;

-	return 1;

-}

-// decode image to YCbCr format

-static int stbi__decode_jpeg_image(stbi__jpeg* j) {

-	int m;

-	for (m = 0; m < 4; m++) {

-		j->img_comp[m].raw_data = NULL;

-		j->img_comp[m].raw_coeff = NULL;

-	}

-	j->restart_interval = 0;

-	if (!stbi__decode_jpeg_header(j, STBI__SCAN_load))

-		return 0;

-	m = stbi__get_marker(j);

-	while (!stbi__EOI(m)) {

-		if (stbi__SOS(m)) {

-			if (!stbi__process_scan_header(j))

-				return 0;

-			if (!stbi__parse_entropy_coded_data(j))

-				return 0;

-			if (j->marker == STBI__MARKER_none) {

-				// handle 0s at the end of image data from IP Kamera 9060

-				while (!stbi__at_eof(j->s)) {

-					int x = stbi__get8(j->s);

-					if (x == 255) {

-						j->marker = stbi__get8(j->s);

-						break;

-					}

-				}

-				// if we reach eof without hitting a marker, stbi__get_marker()

-				// below will fail and we'll eventually return 0

-			}

-		} else if (stbi__DNL(m)) {

-			int Ld = stbi__get16be(j->s);

-			stbi__uint32 NL = stbi__get16be(j->s);

-			if (Ld != 4)

-				return stbi__err("bad DNL len", "Corrupt JPEG");

-			if (NL != j->s->img_y)

-				return stbi__err("bad DNL height", "Corrupt JPEG");

-		} else {

-			if (!stbi__process_marker(j, m))

-				return 0;

-		}

-		m = stbi__get_marker(j);

-	}

-	if (j->progressive)

-		stbi__jpeg_finish(j);

-	return 1;

-}

-// static jfif-centered resampling (across block boundaries)

-typedef stbi_uc* (*resample_row_func)(stbi_uc* out, stbi_uc* in0, stbi_uc* in1, int w, int hs);

-#define stbi__div4(x) ((stbi_uc)((x) >> 2))

-static stbi_uc* resample_row_1(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

-	STBI_NOTUSED(out);

-	STBI_NOTUSED(in_far);

-	STBI_NOTUSED(w);

-	STBI_NOTUSED(hs);

-	return in_near;

-}

-static stbi_uc* stbi__resample_row_v_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

-	// need to generate two samples vertically for every one in input

-	int i;

-	STBI_NOTUSED(hs);

-	for (i = 0; i < w; ++i)

-		out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);

-	return out;

-}

-static stbi_uc* stbi__resample_row_h_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

-	// need to generate two samples horizontally for every one in input

-	int i;

-	stbi_uc* input = in_near;

-	if (w == 1) {

-		// if only one sample, can't do any interpolation

-		out[0] = out[1] = input[0];

-		return out;

-	}

-	out[0] = input[0];

-	out[1] = stbi__div4(input[0] * 3 + input[1] + 2);

-	for (i = 1; i < w - 1; ++i) {

-		int n = 3 * input[i] + 2;

-		out[i * 2 + 0] = stbi__div4(n + input[i - 1]);

-		out[i * 2 + 1] = stbi__div4(n + input[i + 1]);

-	}

-	out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);

-	out[i * 2 + 1] = input[w - 1];

-	STBI_NOTUSED(in_far);

-	STBI_NOTUSED(hs);

-	return out;

-}

-#define stbi__div16(x) ((stbi_uc)((x) >> 4))

-static stbi_uc* stbi__resample_row_hv_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

-	// need to generate 2x2 samples for every one in input

-	int i, t0, t1;

-	if (w == 1) {

-		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);

-		return out;

-	}

-	t1 = 3 * in_near[0] + in_far[0];

-	out[0] = stbi__div4(t1 + 2);

-	for (i = 1; i < w; ++i) {

-		t0 = t1;

-		t1 = 3 * in_near[i] + in_far[i];

-		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);

-		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);

-	}

-	out[w * 2 - 1] = stbi__div4(t1 + 2);

-	STBI_NOTUSED(hs);

-	return out;

-}

-#if defined(STBI_SSE2) || defined(STBI_NEON)

-static stbi_uc* stbi__resample_row_hv_2_simd(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

-	// need to generate 2x2 samples for every one in input

-	int i = 0, t0, t1;

-	if (w == 1) {

-		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);

-		return out;

-	}

-	t1 = 3 * in_near[0] + in_far[0];

-	// process groups of 8 pixels for as long as we can.

-	// note we can't handle the last pixel in a row in this loop

-	// because we need to handle the filter boundary conditions.

-	for (; i < ((w - 1) & ~7); i += 8) {

-#if defined(STBI_SSE2)

-		// load and perform the vertical filtering pass

-		// this uses 3*x + y = 4*x + (y - x)

-		__m128i zero = _mm_setzero_si128();

-		__m128i farb = _mm_loadl_epi64((__m128i*)(in_far + i));

-		__m128i nearb = _mm_loadl_epi64((__m128i*)(in_near + i));

-		__m128i farw = _mm_unpacklo_epi8(farb, zero);

-		__m128i nearw = _mm_unpacklo_epi8(nearb, zero);

-		__m128i diff = _mm_sub_epi16(farw, nearw);

-		__m128i nears = _mm_slli_epi16(nearw, 2);

-		__m128i curr = _mm_add_epi16(nears, diff); // current row

-		// horizontal filter works the same based on shifted vers of current

-		// row. "prev" is current row shifted right by 1 pixel; we need to

-		// insert the previous pixel value (from t1).

-		// "next" is current row shifted left by 1 pixel, with first pixel

-		// of next block of 8 pixels added in.

-		__m128i prv0 = _mm_slli_si128(curr, 2);

-		__m128i nxt0 = _mm_srli_si128(curr, 2);

-		__m128i prev = _mm_insert_epi16(prv0, t1, 0);

-		__m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);

-		// horizontal filter, polyphase implementation since it's convenient:

-		// even pixels = 3*cur + prev = cur*4 + (prev - cur)

-		// odd  pixels = 3*cur + next = cur*4 + (next - cur)

-		// note the shared term.

-		__m128i bias = _mm_set1_epi16(8);

-		__m128i curs = _mm_slli_epi16(curr, 2);

-		__m128i prvd = _mm_sub_epi16(prev, curr);

-		__m128i nxtd = _mm_sub_epi16(next, curr);

-		__m128i curb = _mm_add_epi16(curs, bias);

-		__m128i even = _mm_add_epi16(prvd, curb);

-		__m128i odd = _mm_add_epi16(nxtd, curb);

-		// interleave even and odd pixels, then undo scaling.

-		__m128i int0 = _mm_unpacklo_epi16(even, odd);

-		__m128i int1 = _mm_unpackhi_epi16(even, odd);

-		__m128i de0 = _mm_srli_epi16(int0, 4);

-		__m128i de1 = _mm_srli_epi16(int1, 4);

-		// pack and write output

-		__m128i outv = _mm_packus_epi16(de0, de1);

-		_mm_storeu_si128((__m128i*)(out + i * 2), outv);

-#elif defined(STBI_NEON)

-		// load and perform the vertical filtering pass

-		// this uses 3*x + y = 4*x + (y - x)

-		uint8x8_t farb = vld1_u8(in_far + i);

-		uint8x8_t nearb = vld1_u8(in_near + i);

-		int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));

-		int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));

-		int16x8_t curr = vaddq_s16(nears, diff); // current row

-		// horizontal filter works the same based on shifted vers of current

-		// row. "prev" is current row shifted right by 1 pixel; we need to

-		// insert the previous pixel value (from t1).

-		// "next" is current row shifted left by 1 pixel, with first pixel

-		// of next block of 8 pixels added in.

-		int16x8_t prv0 = vextq_s16(curr, curr, 7);

-		int16x8_t nxt0 = vextq_s16(curr, curr, 1);

-		int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);

-		int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);

-		// horizontal filter, polyphase implementation since it's convenient:

-		// even pixels = 3*cur + prev = cur*4 + (prev - cur)

-		// odd  pixels = 3*cur + next = cur*4 + (next - cur)

-		// note the shared term.

-		int16x8_t curs = vshlq_n_s16(curr, 2);

-		int16x8_t prvd = vsubq_s16(prev, curr);

-		int16x8_t nxtd = vsubq_s16(next, curr);

-		int16x8_t even = vaddq_s16(curs, prvd);

-		int16x8_t odd = vaddq_s16(curs, nxtd);

-		// undo scaling and round, then store with even/odd phases interleaved

-		uint8x8x2_t o;

-		o.val[0] = vqrshrun_n_s16(even, 4);

-		o.val[1] = vqrshrun_n_s16(odd, 4);

-		vst2_u8(out + i * 2, o);

-#endif

-		// "previous" value for next iter

-		t1 = 3 * in_near[i + 7] + in_far[i + 7];

-	}

-	t0 = t1;

-	t1 = 3 * in_near[i] + in_far[i];

-	out[i * 2] = stbi__div16(3 * t1 + t0 + 8);

-	for (++i; i < w; ++i) {

-		t0 = t1;

-		t1 = 3 * in_near[i] + in_far[i];

-		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);

-		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);

-	}

-	out[w * 2 - 1] = stbi__div4(t1 + 2);

-	STBI_NOTUSED(hs);

-	return out;

-}

-#endif

-static stbi_uc* stbi__resample_row_generic(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

-	// resample with nearest-neighbor

-	int i, j;

-	STBI_NOTUSED(in_far);

-	for (i = 0; i < w; ++i)

-		for (j = 0; j < hs; ++j)

-			out[i * hs + j] = in_near[i];

-	return out;

-}

-// this is a reduced-precision calculation of YCbCr-to-RGB introduced

-// to make sure the code produces the same results in both SIMD and scalar

-#define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8)

-static void stbi__YCbCr_to_RGB_row(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step) {

-	int i;

-	for (i = 0; i < count; ++i) {

-		int y_fixed = (y[i] << 20) + (1 << 19); // rounding

-		int r, g, b;

-		int cr = pcr[i] - 128;

-		int cb = pcb[i] - 128;

-		r = y_fixed + cr * stbi__float2fixed(1.40200f);

-		g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);

-		b = y_fixed + cb * stbi__float2fixed(1.77200f);

-		r >>= 20;

-		g >>= 20;

-		b >>= 20;

-		if ((unsigned)r > 255) {

-			if (r < 0)

-				r = 0;

-			else

-				r = 255;

-		}

-		if ((unsigned)g > 255) {

-			if (g < 0)

-				g = 0;

-			else

-				g = 255;

-		}

-		if ((unsigned)b > 255) {

-			if (b < 0)

-				b = 0;

-			else

-				b = 255;

-		}

-		out[0] = (stbi_uc)r;

-		out[1] = (stbi_uc)g;

-		out[2] = (stbi_uc)b;

-		out[3] = 255;

-		out += step;

-	}

-}

-#if defined(STBI_SSE2) || defined(STBI_NEON)

-static void stbi__YCbCr_to_RGB_simd(stbi_uc* out, stbi_uc const* y, stbi_uc const* pcb, stbi_uc const* pcr, int count, int step) {

-	int i = 0;

-#ifdef STBI_SSE2

-	// step == 3 is pretty ugly on the final interleave, and i'm not convinced

-	// it's useful in practice (you wouldn't use it for textures, for example).

-	// so just accelerate step == 4 case.

-	if (step == 4) {

-		// this is a fairly straightforward implementation and not

-		// super-optimized.

-		__m128i signflip = _mm_set1_epi8(-0x80);

-		__m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));

-		__m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));

-		__m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));

-		__m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));

-		__m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);

-		__m128i xw = _mm_set1_epi16(255); // alpha channel

-		for (; i + 7 < count; i += 8) {

-			// load

-			__m128i y_bytes = _mm_loadl_epi64((__m128i*)(y + i));

-			__m128i cr_bytes = _mm_loadl_epi64((__m128i*)(pcr + i));

-			__m128i cb_bytes = _mm_loadl_epi64((__m128i*)(pcb + i));

-			__m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128

-			__m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128

-			// unpack to short (and left-shift cr, cb by 8)

-			__m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);

-			__m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);

-			__m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);

-			// color transform

-			__m128i yws = _mm_srli_epi16(yw, 4);

-			__m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);

-			__m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);

-			__m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);

-			__m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);

-			__m128i rws = _mm_add_epi16(cr0, yws);

-			__m128i gwt = _mm_add_epi16(cb0, yws);

-			__m128i bws = _mm_add_epi16(yws, cb1);

-			__m128i gws = _mm_add_epi16(gwt, cr1);

-			// descale

-			__m128i rw = _mm_srai_epi16(rws, 4);

-			__m128i bw = _mm_srai_epi16(bws, 4);

-			__m128i gw = _mm_srai_epi16(gws, 4);

-			// back to byte, set up for transpose

-			__m128i brb = _mm_packus_epi16(rw, bw);

-			__m128i gxb = _mm_packus_epi16(gw, xw);

-			// transpose to interleave channels

-			__m128i t0 = _mm_unpacklo_epi8(brb, gxb);

-			__m128i t1 = _mm_unpackhi_epi8(brb, gxb);

-			__m128i o0 = _mm_unpacklo_epi16(t0, t1);

-			__m128i o1 = _mm_unpackhi_epi16(t0, t1);

-			// store

-			_mm_storeu_si128((__m128i*)(out + 0), o0);

-			_mm_storeu_si128((__m128i*)(out + 16), o1);

-			out += 32;

-		}

-	}

-#endif

-#ifdef STBI_NEON

-	// in this version, step=3 support would be easy to add. but is there

-	// demand?

-	if (step == 4) {

-		// this is a fairly straightforward implementation and not

-		// super-optimized.

-		uint8x8_t signflip = vdup_n_u8(0x80);

-		int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));

-		int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));

-		int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));

-		int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));

-		for (; i + 7 < count; i += 8) {

-			// load

-			uint8x8_t y_bytes = vld1_u8(y + i);

-			uint8x8_t cr_bytes = vld1_u8(pcr + i);

-			uint8x8_t cb_bytes = vld1_u8(pcb + i);

-			int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));

-			int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));

-			// expand to s16

-			int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));

-			int16x8_t crw = vshll_n_s8(cr_biased, 7);

-			int16x8_t cbw = vshll_n_s8(cb_biased, 7);

-			// color transform

-			int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);

-			int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);

-			int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);

-			int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);

-			int16x8_t rws = vaddq_s16(yws, cr0);

-			int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);

-			int16x8_t bws = vaddq_s16(yws, cb1);

-			// undo scaling, round, convert to byte

-			uint8x8x4_t o;

-			o.val[0] = vqrshrun_n_s16(rws, 4);

-			o.val[1] = vqrshrun_n_s16(gws, 4);

-			o.val[2] = vqrshrun_n_s16(bws, 4);

-			o.val[3] = vdup_n_u8(255);

-			// store, interleaving r/g/b/a

-			vst4_u8(out, o);

-			out += 8 * 4;

-		}

-	}

-#endif

-	for (; i < count; ++i) {

-		int y_fixed = (y[i] << 20) + (1 << 19); // rounding

-		int r, g, b;

-		int cr = pcr[i] - 128;

-		int cb = pcb[i] - 128;

-		r = y_fixed + cr * stbi__float2fixed(1.40200f);

-		g = y_fixed + cr * -stbi__float2fixed(0.71414f) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);

-		b = y_fixed + cb * stbi__float2fixed(1.77200f);

-		r >>= 20;

-		g >>= 20;

-		b >>= 20;

-		if ((unsigned)r > 255) {

-			if (r < 0)

-				r = 0;

-			else

-				r = 255;

-		}

-		if ((unsigned)g > 255) {

-			if (g < 0)

-				g = 0;

-			else

-				g = 255;

-		}

-		if ((unsigned)b > 255) {

-			if (b < 0)

-				b = 0;

-			else

-				b = 255;

-		}

-		out[0] = (stbi_uc)r;

-		out[1] = (stbi_uc)g;

-		out[2] = (stbi_uc)b;

-		out[3] = 255;

-		out += step;

-	}

-}

-#endif

-// set up the kernels

-static void stbi__setup_jpeg(stbi__jpeg* j) {

-	j->idct_block_kernel = stbi__idct_block;

-	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;

-	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;

-#ifdef STBI_SSE2

-	if (stbi__sse2_available()) {

-		j->idct_block_kernel = stbi__idct_simd;

-		j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;

-		j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;

-	}

-#endif

-#ifdef STBI_NEON

-	j->idct_block_kernel = stbi__idct_simd;

-	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;

-	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;

-#endif

-}

-// clean up the temporary component buffers

-static void stbi__cleanup_jpeg(stbi__jpeg* j) { stbi__free_jpeg_components(j, j->s->img_n, 0); }

-typedef struct {

-	resample_row_func resample;

-	stbi_uc *line0, *line1;

-	int hs, vs;  // expansion factor in each axis

-	int w_lores; // horizontal pixels pre-expansion

-	int ystep;   // how far through vertical expansion we are

-	int ypos;	// which pre-expansion row we're on

-} stbi__resample;

-// fast 0..255 * 0..255 => 0..255 rounded multiplication

-static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) {

-	unsigned int t = x * y + 128;

-	return (stbi_uc)((t + (t >> 8)) >> 8);

-}

-static stbi_uc* load_jpeg_image(stbi__jpeg* z, int* out_x, int* out_y, int* comp, int req_comp) {

-	int n, decode_n, is_rgb;

-	z->s->img_n = 0; // make stbi__cleanup_jpeg safe

-	// validate req_comp

-	if (req_comp < 0 || req_comp > 4)

-		return stbi__errpuc("bad req_comp", "Internal error");

-	// load a jpeg image from whichever source, but leave in YCbCr format

-	if (!stbi__decode_jpeg_image(z)) {

-		stbi__cleanup_jpeg(z);

-		return NULL;

-	}

-	// determine actual number of components to generate

-	n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;

-	is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));

-	if (z->s->img_n == 3 && n < 3 && !is_rgb)

-		decode_n = 1;

-	else

-		decode_n = z->s->img_n;

-	// resample and color-convert

-	{

-		int k;

-		unsigned int i, j;

-		stbi_uc* output;

-		stbi_uc* coutput[4] = {NULL, NULL, NULL, NULL};

-		stbi__resample res_comp[4];

-		for (k = 0; k < decode_n; ++k) {

-			stbi__resample* r = &res_comp[k];

-			// allocate line buffer big enough for upsampling off the edges

-			// with upsample factor of 4

-			z->img_comp[k].linebuf = (stbi_uc*)stbi__malloc(z->s->img_x + 3);

-			if (!z->img_comp[k].linebuf) {

-				stbi__cleanup_jpeg(z);

-				return stbi__errpuc("outofmem", "Out of memory");

-			}

-			r->hs = z->img_h_max / z->img_comp[k].h;

-			r->vs = z->img_v_max / z->img_comp[k].v;

-			r->ystep = r->vs >> 1;

-			r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;

-			r->ypos = 0;

-			r->line0 = r->line1 = z->img_comp[k].data;

-			if (r->hs == 1 && r->vs == 1)

-				r->resample = resample_row_1;

-			else if (r->hs == 1 && r->vs == 2)

-				r->resample = stbi__resample_row_v_2;

-			else if (r->hs == 2 && r->vs == 1)

-				r->resample = stbi__resample_row_h_2;

-			else if (r->hs == 2 && r->vs == 2)

-				r->resample = z->resample_row_hv_2_kernel;

-			else

-				r->resample = stbi__resample_row_generic;

-		}

-		// can't error after this so, this is safe

-		output = (stbi_uc*)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);

-		if (!output) {

-			stbi__cleanup_jpeg(z);

-			return stbi__errpuc("outofmem", "Out of memory");

-		}

-		// now go ahead and resample

-		for (j = 0; j < z->s->img_y; ++j) {

-			stbi_uc* out = output + n * z->s->img_x * j;

-			for (k = 0; k < decode_n; ++k) {

-				stbi__resample* r = &res_comp[k];

-				int y_bot = r->ystep >= (r->vs >> 1);

-				coutput[k] = r->resample(z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0, y_bot ? r->line0 : r->line1, r->w_lores, r->hs);

-				if (++r->ystep >= r->vs) {

-					r->ystep = 0;

-					r->line0 = r->line1;

-					if (++r->ypos < z->img_comp[k].y)

-						r->line1 += z->img_comp[k].w2;

-				}

-			}

-			if (n >= 3) {

-				stbi_uc* y = coutput[0];

-				if (z->s->img_n == 3) {

-					if (is_rgb) {

-						for (i = 0; i < z->s->img_x; ++i) {

-							out[0] = y[i];

-							out[1] = coutput[1][i];

-							out[2] = coutput[2][i];

-							out[3] = 255;

-							out += n;

-						}

-					} else {

-						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);

-					}

-				} else if (z->s->img_n == 4) {

-					if (z->app14_color_transform == 0) { // CMYK

-						for (i = 0; i < z->s->img_x; ++i) {

-							stbi_uc m = coutput[3][i];

-							out[0] = stbi__blinn_8x8(coutput[0][i], m);

-							out[1] = stbi__blinn_8x8(coutput[1][i], m);

-							out[2] = stbi__blinn_8x8(coutput[2][i], m);

-							out[3] = 255;

-							out += n;

-						}

-					} else if (z->app14_color_transform == 2) { // YCCK

-						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);

-						for (i = 0; i < z->s->img_x; ++i) {

-							stbi_uc m = coutput[3][i];

-							out[0] = stbi__blinn_8x8(255 - out[0], m);

-							out[1] = stbi__blinn_8x8(255 - out[1], m);

-							out[2] = stbi__blinn_8x8(255 - out[2], m);

-							out += n;

-						}

-					} else { // YCbCr + alpha?  Ignore the fourth channel for

-							 // now

-						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);

-					}

-				} else

-					for (i = 0; i < z->s->img_x; ++i) {

-						out[0] = out[1] = out[2] = y[i];

-						out[3] = 255; // not used if n==3

-						out += n;

-					}

-			} else {

-				if (is_rgb) {

-					if (n == 1)

-						for (i = 0; i < z->s->img_x; ++i)

-							*out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);

-					else {

-						for (i = 0; i < z->s->img_x; ++i, out += 2) {

-							out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);

-							out[1] = 255;

-						}

-					}

-				} else if (z->s->img_n == 4 && z->app14_color_transform == 0) {

-					for (i = 0; i < z->s->img_x; ++i) {

-						stbi_uc m = coutput[3][i];

-						stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);

-						stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);

-						stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);

-						out[0] = stbi__compute_y(r, g, b);

-						out[1] = 255;

-						out += n;

-					}

-				} else if (z->s->img_n == 4 && z->app14_color_transform == 2) {

-					for (i = 0; i < z->s->img_x; ++i) {

-						out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);

-						out[1] = 255;

-						out += n;

-					}

-				} else {

-					stbi_uc* y = coutput[0];

-					if (n == 1)

-						for (i = 0; i < z->s->img_x; ++i)

-							out[i] = y[i];

-					else

-						for (i = 0; i < z->s->img_x; ++i) {

-							*out++ = y[i];

-							*out++ = 255;

-						}

-				}

-			}

-		}

-		stbi__cleanup_jpeg(z);

-		*out_x = z->s->img_x;

-		*out_y = z->s->img_y;

-		if (comp)

-			*comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output

-		return output;

-	}

-}

-static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	unsigned char* result;

-	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));

-	STBI_NOTUSED(ri);

-	j->s = s;

-	stbi__setup_jpeg(j);

-	result = load_jpeg_image(j, x, y, comp, req_comp);

-	STBI_FREE(j);

-	return result;

-}

-static int stbi__jpeg_test(stbi__context* s) {

-	int r;

-	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));

-	j->s = s;

-	stbi__setup_jpeg(j);

-	r = stbi__decode_jpeg_header(j, STBI__SCAN_type);

-	stbi__rewind(s);

-	STBI_FREE(j);

-	return r;

-}

-static int stbi__jpeg_info_raw(stbi__jpeg* j, int* x, int* y, int* comp) {

-	if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {

-		stbi__rewind(j->s);

-		return 0;

-	}

-	if (x)

-		*x = j->s->img_x;

-	if (y)

-		*y = j->s->img_y;

-	if (comp)

-		*comp = j->s->img_n >= 3 ? 3 : 1;

-	return 1;

-}

-static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp) {

-	int result;

-	stbi__jpeg* j = (stbi__jpeg*)(stbi__malloc(sizeof(stbi__jpeg)));

-	j->s = s;

-	result = stbi__jpeg_info_raw(j, x, y, comp);

-	STBI_FREE(j);

-	return result;

-}

-#endif

-// public domain zlib decode    v0.2  Sean Barrett 2006-11-18

-//    simple implementation

-//      - all input must be provided in an upfront buffer

-//      - all output is written to a single output buffer (can malloc/realloc)

-//    performance

-//      - fast huffman

-#ifndef STBI_NO_ZLIB

-// fast-way is faster to check than jpeg huffman, but slow way is slower

-#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables

-#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)

-// zlib-style huffman encoding

-// (jpegs packs from left, zlib from right, so can't share code)

-typedef struct {

-	stbi__uint16 fast[1 << STBI__ZFAST_BITS];

-	stbi__uint16 firstcode[16];

-	int maxcode[17];

-	stbi__uint16 firstsymbol[16];

-	stbi_uc size[288];

-	stbi__uint16 value[288];

-} stbi__zhuffman;

-stbi_inline static int stbi__bitreverse16(int n) {

-	n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);

-	n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);

-	n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);

-	n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);

-	return n;

-}

-stbi_inline static int stbi__bit_reverse(int v, int bits) {

-	STBI_ASSERT(bits <= 16);

-	// to bit reverse n bits, reverse 16 and shift

-	// e.g. 11 bits, bit reverse and shift away 5

-	return stbi__bitreverse16(v) >> (16 - bits);

-}

-static int stbi__zbuild_huffman(stbi__zhuffman* z, const stbi_uc* sizelist, int num) {

-	int i, k = 0;

-	int code, next_code[16], sizes[17];

-	// DEFLATE spec for generating codes

-	memset(sizes, 0, sizeof(sizes));

-	memset(z->fast, 0, sizeof(z->fast));

-	for (i = 0; i < num; ++i)

-		++sizes[sizelist[i]];

-	sizes[0] = 0;

-	for (i = 1; i < 16; ++i)

-		if (sizes[i] > (1 << i))

-			return stbi__err("bad sizes", "Corrupt PNG");

-	code = 0;

-	for (i = 1; i < 16; ++i) {

-		next_code[i] = code;

-		z->firstcode[i] = (stbi__uint16)code;

-		z->firstsymbol[i] = (stbi__uint16)k;

-		code = (code + sizes[i]);

-		if (sizes[i])

-			if (code - 1 >= (1 << i))

-				return stbi__err("bad codelengths", "Corrupt PNG");

-		z->maxcode[i] = code << (16 - i); // preshift for inner loop

-		code <<= 1;

-		k += sizes[i];

-	}

-	z->maxcode[16] = 0x10000; // sentinel

-	for (i = 0; i < num; ++i) {

-		int s = sizelist[i];

-		if (s) {

-			int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];

-			stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);

-			z->size[c] = (stbi_uc)s;

-			z->value[c] = (stbi__uint16)i;

-			if (s <= STBI__ZFAST_BITS) {

-				int j = stbi__bit_reverse(next_code[s], s);

-				while (j < (1 << STBI__ZFAST_BITS)) {

-					z->fast[j] = fastv;

-					j += (1 << s);

-				}

-			}

-			++next_code[s];

-		}

-	}

-	return 1;

-}

-// zlib-from-memory implementation for PNG reading

-//    because PNG allows splitting the zlib stream arbitrarily,

-//    and it's annoying structurally to have PNG call ZLIB call PNG,

-//    we require PNG read all the IDATs and combine them into a single

-//    memory buffer

-typedef struct {

-	stbi_uc *zbuffer, *zbuffer_end;

-	int num_bits;

-	stbi__uint32 code_buffer;

-	char* zout;

-	char* zout_start;

-	char* zout_end;

-	int z_expandable;

-	stbi__zhuffman z_length, z_distance;

-} stbi__zbuf;

-stbi_inline static stbi_uc stbi__zget8(stbi__zbuf* z) {

-	if (z->zbuffer >= z->zbuffer_end)

-		return 0;

-	return *z->zbuffer++;

-}

-static void stbi__fill_bits(stbi__zbuf* z) {

-	do {

-		STBI_ASSERT(z->code_buffer < (1U << z->num_bits));

-		z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;

-		z->num_bits += 8;

-	} while (z->num_bits <= 24);

-}

-stbi_inline static unsigned int stbi__zreceive(stbi__zbuf* z, int n) {

-	unsigned int k;

-	if (z->num_bits < n)

-		stbi__fill_bits(z);

-	k = z->code_buffer & ((1 << n) - 1);

-	z->code_buffer >>= n;

-	z->num_bits -= n;

-	return k;

-}

-static int stbi__zhuffman_decode_slowpath(stbi__zbuf* a, stbi__zhuffman* z) {

-	int b, s, k;

-	// not resolved by fast table, so compute it the slow way

-	// use jpeg approach, which requires MSbits at top

-	k = stbi__bit_reverse(a->code_buffer, 16);

-	for (s = STBI__ZFAST_BITS + 1;; ++s)

-		if (k < z->maxcode[s])

-			break;

-	if (s == 16)

-		return -1; // invalid code!

-	// code size is s, so:

-	b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];

-	STBI_ASSERT(z->size[b] == s);

-	a->code_buffer >>= s;

-	a->num_bits -= s;

-	return z->value[b];

-}

-stbi_inline static int stbi__zhuffman_decode(stbi__zbuf* a, stbi__zhuffman* z) {

-	int b, s;

-	if (a->num_bits < 16)

-		stbi__fill_bits(a);

-	b = z->fast[a->code_buffer & STBI__ZFAST_MASK];

-	if (b) {

-		s = b >> 9;

-		a->code_buffer >>= s;

-		a->num_bits -= s;

-		return b & 511;

-	}

-	return stbi__zhuffman_decode_slowpath(a, z);

-}

-static int stbi__zexpand(stbi__zbuf* z, char* zout,

-						 int n) // need to make room for n bytes

-{

-	char* q;

-	int cur, limit, old_limit;

-	z->zout = zout;

-	if (!z->z_expandable)

-		return stbi__err("output buffer limit", "Corrupt PNG");

-	cur = (int)(z->zout - z->zout_start);

-	limit = old_limit = (int)(z->zout_end - z->zout_start);

-	while (cur + n > limit)

-		limit *= 2;

-	q = (char*)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);

-	STBI_NOTUSED(old_limit);

-	if (q == NULL)

-		return stbi__err("outofmem", "Out of memory");

-	z->zout_start = q;

-	z->zout = q + cur;

-	z->zout_end = q + limit;

-	return 1;

-}

-static const int stbi__zlength_base[31] = {3,  4,  5,  6,  7,  8,  9,  10,  11,  13,  15,  17,  19,  23, 27, 31,

-										   35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0,  0};

-static const int stbi__zlength_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0};

-static const int stbi__zdist_base[32] = {1,   2,   3,   4,   5,	7,	9,	13,   17,   25,   33,   49,	65,	97,	129, 193,

-										 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};

-static const int stbi__zdist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};

-static int stbi__parse_huffman_block(stbi__zbuf* a) {

-	char* zout = a->zout;

-	for (;;) {

-		int z = stbi__zhuffman_decode(a, &a->z_length);

-		if (z < 256) {

-			if (z < 0)

-				return stbi__err("bad huffman code",

-								 "Corrupt PNG"); // error in huffman codes

-			if (zout >= a->zout_end) {

-				if (!stbi__zexpand(a, zout, 1))

-					return 0;

-				zout = a->zout;

-			}

-			*zout++ = (char)z;

-		} else {

-			stbi_uc* p;

-			int len, dist;

-			if (z == 256) {

-				a->zout = zout;

-				return 1;

-			}

-			z -= 257;

-			len = stbi__zlength_base[z];

-			if (stbi__zlength_extra[z])

-				len += stbi__zreceive(a, stbi__zlength_extra[z]);

-			z = stbi__zhuffman_decode(a, &a->z_distance);

-			if (z < 0)

-				return stbi__err("bad huffman code", "Corrupt PNG");

-			dist = stbi__zdist_base[z];

-			if (stbi__zdist_extra[z])

-				dist += stbi__zreceive(a, stbi__zdist_extra[z]);

-			if (zout - a->zout_start < dist)

-				return stbi__err("bad dist", "Corrupt PNG");

-			if (zout + len > a->zout_end) {

-				if (!stbi__zexpand(a, zout, len))

-					return 0;

-				zout = a->zout;

-			}

-			p = (stbi_uc*)(zout - dist);

-			if (dist == 1) { // run of one byte; common in images.

-				stbi_uc v = *p;

-				if (len) {

-					do

-						*zout++ = v;

-					while (--len);

-				}

-			} else {

-				if (len) {

-					do

-						*zout++ = *p++;

-					while (--len);

-				}

-			}

-		}

-	}

-}

-static int stbi__compute_huffman_codes(stbi__zbuf* a) {

-	static const stbi_uc length_dezigzag[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};

-	stbi__zhuffman z_codelength;

-	stbi_uc lencodes[286 + 32 + 137]; // padding for maximum single op

-	stbi_uc codelength_sizes[19];

-	int i, n;

-	int hlit = stbi__zreceive(a, 5) + 257;

-	int hdist = stbi__zreceive(a, 5) + 1;

-	int hclen = stbi__zreceive(a, 4) + 4;

-	int ntot = hlit + hdist;

-	memset(codelength_sizes, 0, sizeof(codelength_sizes));

-	for (i = 0; i < hclen; ++i) {

-		int s = stbi__zreceive(a, 3);

-		codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;

-	}

-	if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19))

-		return 0;

-	n = 0;

-	while (n < ntot) {

-		int c = stbi__zhuffman_decode(a, &z_codelength);

-		if (c < 0 || c >= 19)

-			return stbi__err("bad codelengths", "Corrupt PNG");

-		if (c < 16)

-			lencodes[n++] = (stbi_uc)c;

-		else {

-			stbi_uc fill = 0;

-			if (c == 16) {

-				c = stbi__zreceive(a, 2) + 3;

-				if (n == 0)

-					return stbi__err("bad codelengths", "Corrupt PNG");

-				fill = lencodes[n - 1];

-			} else if (c == 17)

-				c = stbi__zreceive(a, 3) + 3;

-			else {

-				STBI_ASSERT(c == 18);

-				c = stbi__zreceive(a, 7) + 11;

-			}

-			if (ntot - n < c)

-				return stbi__err("bad codelengths", "Corrupt PNG");

-			memset(lencodes + n, fill, c);

-			n += c;

-		}

-	}

-	if (n != ntot)

-		return stbi__err("bad codelengths", "Corrupt PNG");

-	if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit))

-		return 0;

-	if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist))

-		return 0;

-	return 1;

-}

-static int stbi__parse_uncompressed_block(stbi__zbuf* a) {

-	stbi_uc header[4];

-	int len, nlen, k;

-	if (a->num_bits & 7)

-		stbi__zreceive(a, a->num_bits & 7); // discard

-	// drain the bit-packed data into header

-	k = 0;

-	while (a->num_bits > 0) {

-		header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check

-		a->code_buffer >>= 8;

-		a->num_bits -= 8;

-	}

-	STBI_ASSERT(a->num_bits == 0);

-	// now fill header the normal way

-	while (k < 4)

-		header[k++] = stbi__zget8(a);

-	len = header[1] * 256 + header[0];

-	nlen = header[3] * 256 + header[2];

-	if (nlen != (len ^ 0xffff))

-		return stbi__err("zlib corrupt", "Corrupt PNG");

-	if (a->zbuffer + len > a->zbuffer_end)

-		return stbi__err("read past buffer", "Corrupt PNG");

-	if (a->zout + len > a->zout_end)

-		if (!stbi__zexpand(a, a->zout, len))

-			return 0;

-	memcpy(a->zout, a->zbuffer, len);

-	a->zbuffer += len;

-	a->zout += len;

-	return 1;

-}

-static int stbi__parse_zlib_header(stbi__zbuf* a) {

-	int cmf = stbi__zget8(a);

-	int cm = cmf & 15;

-	/* int cinfo = cmf >> 4; */

-	int flg = stbi__zget8(a);

-	if ((cmf * 256 + flg) % 31 != 0)

-		return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec

-	if (flg & 32)

-		return stbi__err("no preset dict",

-						 "Corrupt PNG"); // preset dictionary not allowed in png

-	if (cm != 8)

-		return stbi__err("bad compression",

-						 "Corrupt PNG"); // DEFLATE required for png

-	// window = 1 << (8 + cinfo)... but who cares, we fully buffer output

-	return 1;

-}

-static const stbi_uc stbi__zdefault_length[288] = {

-	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

-	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

-	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

-	9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,

-	9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,

-	9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8};

-static const stbi_uc stbi__zdefault_distance[32] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};

-/*

-Init algorithm:

-{

-   int i;   // use <= to match clearly with spec

-   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;

-   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;

-   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;

-   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;

-   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;

-}

-*/

-static int stbi__parse_zlib(stbi__zbuf* a, int parse_header) {

-	int final, type;

-	if (parse_header)

-		if (!stbi__parse_zlib_header(a))

-			return 0;

-	a->num_bits = 0;

-	a->code_buffer = 0;

-	do {

-		final = stbi__zreceive(a, 1);

-		type = stbi__zreceive(a, 2);

-		if (type == 0) {

-			if (!stbi__parse_uncompressed_block(a))

-				return 0;

-		} else if (type == 3) {

-			return 0;

-		} else {

-			if (type == 1) {

-				// use fixed code lengths

-				if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288))

-					return 0;

-				if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32))

-					return 0;

-			} else {

-				if (!stbi__compute_huffman_codes(a))

-					return 0;

-			}

-			if (!stbi__parse_huffman_block(a))

-				return 0;

-		}

-	} while (!final);

-	return 1;

-}

-static int stbi__do_zlib(stbi__zbuf* a, char* obuf, int olen, int exp, int parse_header) {

-	a->zout_start = obuf;

-	a->zout = obuf;

-	a->zout_end = obuf + olen;

-	a->z_expandable = exp;

-	return stbi__parse_zlib(a, parse_header);

-}

-STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen) {

-	stbi__zbuf a;

-	char* p = (char*)stbi__malloc(initial_size);

-	if (p == NULL)

-		return NULL;

-	a.zbuffer = (stbi_uc*)buffer;

-	a.zbuffer_end = (stbi_uc*)buffer + len;

-	if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {

-		if (outlen)

-			*outlen = (int)(a.zout - a.zout_start);

-		return a.zout_start;

-	} else {

-		STBI_FREE(a.zout_start);

-		return NULL;

-	}

-}

-STBIDEF char* stbi_zlib_decode_malloc(char const* buffer, int len, int* outlen) { return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); }

-STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header) {

-	stbi__zbuf a;

-	char* p = (char*)stbi__malloc(initial_size);

-	if (p == NULL)

-		return NULL;

-	a.zbuffer = (stbi_uc*)buffer;

-	a.zbuffer_end = (stbi_uc*)buffer + len;

-	if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {

-		if (outlen)

-			*outlen = (int)(a.zout - a.zout_start);

-		return a.zout_start;

-	} else {

-		STBI_FREE(a.zout_start);

-		return NULL;

-	}

-}

-STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, char const* ibuffer, int ilen) {

-	stbi__zbuf a;

-	a.zbuffer = (stbi_uc*)ibuffer;

-	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;

-	if (stbi__do_zlib(&a, obuffer, olen, 0, 1))

-		return (int)(a.zout - a.zout_start);

-	else

-		return -1;

-}

-STBIDEF char* stbi_zlib_decode_noheader_malloc(char const* buffer, int len, int* outlen) {

-	stbi__zbuf a;

-	char* p = (char*)stbi__malloc(16384);

-	if (p == NULL)

-		return NULL;

-	a.zbuffer = (stbi_uc*)buffer;

-	a.zbuffer_end = (stbi_uc*)buffer + len;

-	if (stbi__do_zlib(&a, p, 16384, 1, 0)) {

-		if (outlen)

-			*outlen = (int)(a.zout - a.zout_start);

-		return a.zout_start;

-	} else {

-		STBI_FREE(a.zout_start);

-		return NULL;

-	}

-}

-STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen) {

-	stbi__zbuf a;

-	a.zbuffer = (stbi_uc*)ibuffer;

-	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;

-	if (stbi__do_zlib(&a, obuffer, olen, 0, 0))

-		return (int)(a.zout - a.zout_start);

-	else

-		return -1;

-}

-#endif

-// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18

-//    simple implementation

-//      - only 8-bit samples

-//      - no CRC checking

-//      - allocates lots of intermediate memory

-//        - avoids problem of streaming data between subsystems

-//        - avoids explicit window management

-//    performance

-//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding

-#ifndef STBI_NO_PNG

-typedef struct {

-	stbi__uint32 length;

-	stbi__uint32 type;

-} stbi__pngchunk;

-static stbi__pngchunk stbi__get_chunk_header(stbi__context* s) {

-	stbi__pngchunk c;

-	c.length = stbi__get32be(s);

-	c.type = stbi__get32be(s);

-	return c;

-}

-static int stbi__check_png_header(stbi__context* s) {

-	static const stbi_uc png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};

-	int i;

-	for (i = 0; i < 8; ++i)

-		if (stbi__get8(s) != png_sig[i])

-			return stbi__err("bad png sig", "Not a PNG");

-	return 1;

-}

-typedef struct {

-	stbi__context* s;

-	stbi_uc *idata, *expanded, *out;

-	int depth;

-} stbi__png;

-enum {

-	STBI__F_none = 0,

-	STBI__F_sub = 1,

-	STBI__F_up = 2,

-	STBI__F_avg = 3,

-	STBI__F_paeth = 4,

-	// synthetic filters used for first scanline to avoid needing a dummy row of

-	// 0s

-	STBI__F_avg_first,

-	STBI__F_paeth_first

-};

-static stbi_uc first_row_filter[5] = {STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first, STBI__F_paeth_first};

-static int stbi__paeth(int a, int b, int c) {

-	int p = a + b - c;

-	int pa = abs(p - a);

-	int pb = abs(p - b);

-	int pc = abs(p - c);

-	if (pa <= pb && pa <= pc)

-		return a;

-	if (pb <= pc)

-		return b;

-	return c;

-}

-static const stbi_uc stbi__depth_scale_table[9] = {0, 0xff, 0x55, 0, 0x11, 0, 0, 0, 0x01};

-// create the png data from post-deflated data

-static int stbi__create_png_image_raw(stbi__png* a, stbi_uc* raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) {

-	int bytes = (depth == 16 ? 2 : 1);

-	stbi__context* s = a->s;

-	stbi__uint32 i, j, stride = x * out_n * bytes;

-	stbi__uint32 img_len, img_width_bytes;

-	int k;

-	int img_n = s->img_n; // copy it into a local for later

-	int output_bytes = out_n * bytes;

-	int filter_bytes = img_n * bytes;

-	int width = x;

-	STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);

-	a->out = (stbi_uc*)stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into

-	if (!a->out)

-		return stbi__err("outofmem", "Out of memory");

-	if (!stbi__mad3sizes_valid(img_n, x, depth, 7))

-		return stbi__err("too large", "Corrupt PNG");

-	img_width_bytes = (((img_n * x * depth) + 7) >> 3);

-	img_len = (img_width_bytes + 1) * y;

-	// we used to check for exact match between raw_len and img_len on

-	// non-interlaced PNGs, but issue #276 reported a PNG in the wild that had

-	// extra data at the end (all zeros), so just check for raw_len < img_len

-	// always.

-	if (raw_len < img_len)

-		return stbi__err("not enough pixels", "Corrupt PNG");

-	for (j = 0; j < y; ++j) {

-		stbi_uc* cur = a->out + stride * j;

-		stbi_uc* prior;

-		int filter = *raw++;

-		if (filter > 4)

-			return stbi__err("invalid filter", "Corrupt PNG");

-		if (depth < 8) {

-			STBI_ASSERT(img_width_bytes <= x);

-			cur += x * out_n - img_width_bytes; // store output to the rightmost img_len

-												// bytes, so we can decode in place

-			filter_bytes = 1;

-			width = img_width_bytes;

-		}

-		prior = cur - stride; // bugfix: need to compute this after 'cur +='

-							  // computation above

-		// if first row, use special filter that doesn't sample previous row

-		if (j == 0)

-			filter = first_row_filter[filter];

-		// handle first byte explicitly

-		for (k = 0; k < filter_bytes; ++k) {

-			switch (filter) {

-			case STBI__F_none:

-				cur[k] = raw[k];

-				break;

-			case STBI__F_sub:

-				cur[k] = raw[k];

-				break;

-			case STBI__F_up:

-				cur[k] = STBI__BYTECAST(raw[k] + prior[k]);

-				break;

-			case STBI__F_avg:

-				cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1));

-				break;

-			case STBI__F_paeth:

-				cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0));

-				break;

-			case STBI__F_avg_first:

-				cur[k] = raw[k];

-				break;

-			case STBI__F_paeth_first:

-				cur[k] = raw[k];

-				break;

-			}

-		}

-		if (depth == 8) {

-			if (img_n != out_n)

-				cur[img_n] = 255; // first pixel

-			raw += img_n;

-			cur += out_n;

-			prior += out_n;

-		} else if (depth == 16) {

-			if (img_n != out_n) {

-				cur[filter_bytes] = 255;	 // first pixel top byte

-				cur[filter_bytes + 1] = 255; // first pixel bottom byte

-			}

-			raw += filter_bytes;

-			cur += output_bytes;

-			prior += output_bytes;

-		} else {

-			raw += 1;

-			cur += 1;

-			prior += 1;

-		}

-		// this is a little gross, so that we don't switch per-pixel or

-		// per-component

-		if (depth < 8 || img_n == out_n) {

-			int nk = (width - 1) * filter_bytes;

-#define STBI__CASE(f)                                                                                                                                          \

-	case f:                                                                                                                                                    \

-		for (k = 0; k < nk; ++k)

-			switch (filter) {

-			// "none" filter turns into a memcpy here; make that explicit.

-			case STBI__F_none:

-				memcpy(cur, raw, nk);

-				break;

-				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); }

-				break;

-				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }

-				break;

-				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); }

-				break;

-				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); }

-				break;

-				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); }

-				break;

-				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0)); }

-				break;

-			}

-#undef STBI__CASE

-			raw += nk;

-		} else {

-			STBI_ASSERT(img_n + 1 == out_n);

-#define STBI__CASE(f)                                                                                                                                          \

-	case f:                                                                                                                                                    \

-		for (i = x - 1; i >= 1; --i, cur[filter_bytes] = 255, raw += filter_bytes, cur += output_bytes, prior += output_bytes)                                 \

-			for (k = 0; k < filter_bytes; ++k)

-			switch (filter) {

-				STBI__CASE(STBI__F_none) { cur[k] = raw[k]; }

-				break;

-				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); }

-				break;

-				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }

-				break;

-				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); }

-				break;

-				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); }

-				break;

-				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); }

-				break;

-				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0)); }

-				break;

-			}

-#undef STBI__CASE

-			// the loop above sets the high byte of the pixels' alpha, but for

-			// 16 bit png files we also need the low byte set. we'll do that

-			// here.

-			if (depth == 16) {

-				cur = a->out + stride * j; // start at the beginning of the row again

-				for (i = 0; i < x; ++i, cur += output_bytes) {

-					cur[filter_bytes + 1] = 255;

-				}

-			}

-		}

-	}

-	// we make a separate pass to expand bits to pixels; for performance,

-	// this could run two scanlines behind the above code, so it won't

-	// intefere with filtering but will still be in the cache.

-	if (depth < 8) {

-		for (j = 0; j < y; ++j) {

-			stbi_uc* cur = a->out + stride * j;

-			stbi_uc* in = a->out + stride * j + x * out_n - img_width_bytes;

-			// unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the

-			// common 8-bit path optimal at minimal cost for 1/2/4-bit png

-			// guarante byte alignment, if width is not multiple of 8/4/2 we'll

-			// decode dummy trailing data that will be skipped in the later loop

-			stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range

-			// note that the final byte might overshoot and write more data than

-			// desired. we can allocate enough data that this never writes out

-			// of memory, but it could also overwrite the next scanline. can it

-			// overwrite non-empty data on the next scanline? yes, consider

-			// 1-pixel-wide scanlines with 1-bit-per-pixel. so we need to

-			// explicitly clamp the final ones

-			if (depth == 4) {

-				for (k = x * img_n; k >= 2; k -= 2, ++in) {

-					*cur++ = scale * ((*in >> 4));

-					*cur++ = scale * ((*in) & 0x0f);

-				}

-				if (k > 0)

-					*cur++ = scale * ((*in >> 4));

-			} else if (depth == 2) {

-				for (k = x * img_n; k >= 4; k -= 4, ++in) {

-					*cur++ = scale * ((*in >> 6));

-					*cur++ = scale * ((*in >> 4) & 0x03);

-					*cur++ = scale * ((*in >> 2) & 0x03);

-					*cur++ = scale * ((*in) & 0x03);

-				}

-				if (k > 0)

-					*cur++ = scale * ((*in >> 6));

-				if (k > 1)

-					*cur++ = scale * ((*in >> 4) & 0x03);

-				if (k > 2)

-					*cur++ = scale * ((*in >> 2) & 0x03);

-			} else if (depth == 1) {

-				for (k = x * img_n; k >= 8; k -= 8, ++in) {

-					*cur++ = scale * ((*in >> 7));

-					*cur++ = scale * ((*in >> 6) & 0x01);

-					*cur++ = scale * ((*in >> 5) & 0x01);

-					*cur++ = scale * ((*in >> 4) & 0x01);

-					*cur++ = scale * ((*in >> 3) & 0x01);

-					*cur++ = scale * ((*in >> 2) & 0x01);

-					*cur++ = scale * ((*in >> 1) & 0x01);

-					*cur++ = scale * ((*in) & 0x01);

-				}

-				if (k > 0)

-					*cur++ = scale * ((*in >> 7));

-				if (k > 1)

-					*cur++ = scale * ((*in >> 6) & 0x01);

-				if (k > 2)

-					*cur++ = scale * ((*in >> 5) & 0x01);

-				if (k > 3)

-					*cur++ = scale * ((*in >> 4) & 0x01);

-				if (k > 4)

-					*cur++ = scale * ((*in >> 3) & 0x01);

-				if (k > 5)

-					*cur++ = scale * ((*in >> 2) & 0x01);

-				if (k > 6)

-					*cur++ = scale * ((*in >> 1) & 0x01);

-			}

-			if (img_n != out_n) {

-				int q;

-				// insert alpha = 255

-				cur = a->out + stride * j;

-				if (img_n == 1) {

-					for (q = x - 1; q >= 0; --q) {

-						cur[q * 2 + 1] = 255;

-						cur[q * 2 + 0] = cur[q];

-					}

-				} else {

-					STBI_ASSERT(img_n == 3);

-					for (q = x - 1; q >= 0; --q) {

-						cur[q * 4 + 3] = 255;

-						cur[q * 4 + 2] = cur[q * 3 + 2];

-						cur[q * 4 + 1] = cur[q * 3 + 1];

-						cur[q * 4 + 0] = cur[q * 3 + 0];

-					}

-				}

-			}

-		}

-	} else if (depth == 16) {

-		// force the image data from big-endian to platform-native.

-		// this is done in a separate pass due to the decoding relying

-		// on the data being untouched, but could probably be done

-		// per-line during decode if care is taken.

-		stbi_uc* cur = a->out;

-		stbi__uint16* cur16 = (stbi__uint16*)cur;

-		for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {

-			*cur16 = (cur[0] << 8) | cur[1];

-		}

-	}

-	return 1;

-}

-static int stbi__create_png_image(stbi__png* a, stbi_uc* image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) {

-	int bytes = (depth == 16 ? 2 : 1);

-	int out_bytes = out_n * bytes;

-	stbi_uc* final;

-	int p;

-	if (!interlaced)

-		return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);

-	// de-interlacing

-	final = (stbi_uc*)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);

-	for (p = 0; p < 7; ++p) {

-		int xorig[] = {0, 4, 0, 2, 0, 1, 0};

-		int yorig[] = {0, 0, 4, 0, 2, 0, 1};

-		int xspc[] = {8, 8, 4, 4, 2, 2, 1};

-		int yspc[] = {8, 8, 8, 4, 4, 2, 2};

-		int i, j, x, y;

-		// pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1

-		x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];

-		y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];

-		if (x && y) {

-			stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;

-			if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {

-				STBI_FREE(final);

-				return 0;

-			}

-			for (j = 0; j < y; ++j) {

-				for (i = 0; i < x; ++i) {

-					int out_y = j * yspc[p] + yorig[p];

-					int out_x = i * xspc[p] + xorig[p];

-					memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes, a->out + (j * x + i) * out_bytes, out_bytes);

-				}

-			}

-			STBI_FREE(a->out);

-			image_data += img_len;

-			image_data_len -= img_len;

-		}

-	}

-	a->out = final;

-	return 1;

-}

-static int stbi__compute_transparency(stbi__png* z, stbi_uc tc[3], int out_n) {

-	stbi__context* s = z->s;

-	stbi__uint32 i, pixel_count = s->img_x * s->img_y;

-	stbi_uc* p = z->out;

-	// compute color-based transparency, assuming we've

-	// already got 255 as the alpha value in the output

-	STBI_ASSERT(out_n == 2 || out_n == 4);

-	if (out_n == 2) {

-		for (i = 0; i < pixel_count; ++i) {

-			p[1] = (p[0] == tc[0] ? 0 : 255);

-			p += 2;

-		}

-	} else {

-		for (i = 0; i < pixel_count; ++i) {

-			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])

-				p[3] = 0;

-			p += 4;

-		}

-	}

-	return 1;

-}

-static int stbi__compute_transparency16(stbi__png* z, stbi__uint16 tc[3], int out_n) {

-	stbi__context* s = z->s;

-	stbi__uint32 i, pixel_count = s->img_x * s->img_y;

-	stbi__uint16* p = (stbi__uint16*)z->out;

-	// compute color-based transparency, assuming we've

-	// already got 65535 as the alpha value in the output

-	STBI_ASSERT(out_n == 2 || out_n == 4);

-	if (out_n == 2) {

-		for (i = 0; i < pixel_count; ++i) {

-			p[1] = (p[0] == tc[0] ? 0 : 65535);

-			p += 2;

-		}

-	} else {

-		for (i = 0; i < pixel_count; ++i) {

-			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])

-				p[3] = 0;

-			p += 4;

-		}

-	}

-	return 1;

-}

-static int stbi__expand_png_palette(stbi__png* a, stbi_uc* palette, int len, int pal_img_n) {

-	stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;

-	stbi_uc *p, *temp_out, *orig = a->out;

-	p = (stbi_uc*)stbi__malloc_mad2(pixel_count, pal_img_n, 0);

-	if (p == NULL)

-		return stbi__err("outofmem", "Out of memory");

-	// between here and free(out) below, exitting would leak

-	temp_out = p;

-	if (pal_img_n == 3) {

-		for (i = 0; i < pixel_count; ++i) {

-			int n = orig[i] * 4;

-			p[0] = palette[n];

-			p[1] = palette[n + 1];

-			p[2] = palette[n + 2];

-			p += 3;

-		}

-	} else {

-		for (i = 0; i < pixel_count; ++i) {

-			int n = orig[i] * 4;

-			p[0] = palette[n];

-			p[1] = palette[n + 1];

-			p[2] = palette[n + 2];

-			p[3] = palette[n + 3];

-			p += 4;

-		}

-	}

-	STBI_FREE(a->out);

-	a->out = temp_out;

-	STBI_NOTUSED(len);

-	return 1;

-}

-static int stbi__unpremultiply_on_load = 0;

-static int stbi__de_iphone_flag = 0;

-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) { stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; }

-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) { stbi__de_iphone_flag = flag_true_if_should_convert; }

-static void stbi__de_iphone(stbi__png* z) {

-	stbi__context* s = z->s;

-	stbi__uint32 i, pixel_count = s->img_x * s->img_y;

-	stbi_uc* p = z->out;

-	if (s->img_out_n == 3) { // convert bgr to rgb

-		for (i = 0; i < pixel_count; ++i) {

-			stbi_uc t = p[0];

-			p[0] = p[2];

-			p[2] = t;

-			p += 3;

-		}

-	} else {

-		STBI_ASSERT(s->img_out_n == 4);

-		if (stbi__unpremultiply_on_load) {

-			// convert bgr to rgb and unpremultiply

-			for (i = 0; i < pixel_count; ++i) {

-				stbi_uc a = p[3];

-				stbi_uc t = p[0];

-				if (a) {

-					stbi_uc half = a / 2;

-					p[0] = (p[2] * 255 + half) / a;

-					p[1] = (p[1] * 255 + half) / a;

-					p[2] = (t * 255 + half) / a;

-				} else {

-					p[0] = p[2];

-					p[2] = t;

-				}

-				p += 4;

-			}

-		} else {

-			// convert bgr to rgb

-			for (i = 0; i < pixel_count; ++i) {

-				stbi_uc t = p[0];

-				p[0] = p[2];

-				p[2] = t;

-				p += 4;

-			}

-		}

-	}

-}

-#define STBI__PNG_TYPE(a, b, c, d) (((unsigned)(a) << 24) + ((unsigned)(b) << 16) + ((unsigned)(c) << 8) + (unsigned)(d))

-static int stbi__parse_png_file(stbi__png* z, int scan, int req_comp) {

-	stbi_uc palette[1024], pal_img_n = 0;

-	stbi_uc has_trans = 0, tc[3] = {0};

-	stbi__uint16 tc16[3];

-	stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;

-	int first = 1, k, interlace = 0, color = 0, is_iphone = 0;

-	stbi__context* s = z->s;

-	z->expanded = NULL;

-	z->idata = NULL;

-	z->out = NULL;

-	if (!stbi__check_png_header(s))

-		return 0;

-	if (scan == STBI__SCAN_type)

-		return 1;

-	for (;;) {

-		stbi__pngchunk c = stbi__get_chunk_header(s);

-		switch (c.type) {

-		case STBI__PNG_TYPE('C', 'g', 'B', 'I'):

-			is_iphone = 1;

-			stbi__skip(s, c.length);

-			break;

-		case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {

-			int comp, filter;

-			if (!first)

-				return stbi__err("multiple IHDR", "Corrupt PNG");

-			first = 0;

-			if (c.length != 13)

-				return stbi__err("bad IHDR len", "Corrupt PNG");

-			s->img_x = stbi__get32be(s);

-			if (s->img_x > (1 << 24))

-				return stbi__err("too large", "Very large image (corrupt?)");

-			s->img_y = stbi__get32be(s);

-			if (s->img_y > (1 << 24))

-				return stbi__err("too large", "Very large image (corrupt?)");

-			z->depth = stbi__get8(s);

-			if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)

-				return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");

-			color = stbi__get8(s);

-			if (color > 6)

-				return stbi__err("bad ctype", "Corrupt PNG");

-			if (color == 3 && z->depth == 16)

-				return stbi__err("bad ctype", "Corrupt PNG");

-			if (color == 3)

-				pal_img_n = 3;

-			else if (color & 1)

-				return stbi__err("bad ctype", "Corrupt PNG");

-			comp = stbi__get8(s);

-			if (comp)

-				return stbi__err("bad comp method", "Corrupt PNG");

-			filter = stbi__get8(s);

-			if (filter)

-				return stbi__err("bad filter method", "Corrupt PNG");

-			interlace = stbi__get8(s);

-			if (interlace > 1)

-				return stbi__err("bad interlace method", "Corrupt PNG");

-			if (!s->img_x || !s->img_y)

-				return stbi__err("0-pixel image", "Corrupt PNG");

-			if (!pal_img_n) {

-				s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);

-				if ((1 << 30) / s->img_x / s->img_n < s->img_y)

-					return stbi__err("too large", "Image too large to decode");

-				if (scan == STBI__SCAN_header)

-					return 1;

-			} else {

-				// if paletted, then pal_n is our final components, and

-				// img_n is # components to decompress/filter.

-				s->img_n = 1;

-				if ((1 << 30) / s->img_x / 4 < s->img_y)

-					return stbi__err("too large", "Corrupt PNG");

-				// if SCAN_header, have to scan to see if we have a tRNS

-			}

-			break;

-		}

-		case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {

-			if (first)

-				return stbi__err("first not IHDR", "Corrupt PNG");

-			if (c.length > 256 * 3)

-				return stbi__err("invalid PLTE", "Corrupt PNG");

-			pal_len = c.length / 3;

-			if (pal_len * 3 != c.length)

-				return stbi__err("invalid PLTE", "Corrupt PNG");

-			for (i = 0; i < pal_len; ++i) {

-				palette[i * 4 + 0] = stbi__get8(s);

-				palette[i * 4 + 1] = stbi__get8(s);

-				palette[i * 4 + 2] = stbi__get8(s);

-				palette[i * 4 + 3] = 255;

-			}

-			break;

-		}

-		case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {

-			if (first)

-				return stbi__err("first not IHDR", "Corrupt PNG");

-			if (z->idata)

-				return stbi__err("tRNS after IDAT", "Corrupt PNG");

-			if (pal_img_n) {

-				if (scan == STBI__SCAN_header) {

-					s->img_n = 4;

-					return 1;

-				}

-				if (pal_len == 0)

-					return stbi__err("tRNS before PLTE", "Corrupt PNG");

-				if (c.length > pal_len)

-					return stbi__err("bad tRNS len", "Corrupt PNG");

-				pal_img_n = 4;

-				for (i = 0; i < c.length; ++i)

-					palette[i * 4 + 3] = stbi__get8(s);

-			} else {

-				if (!(s->img_n & 1))

-					return stbi__err("tRNS with alpha", "Corrupt PNG");

-				if (c.length != (stbi__uint32)s->img_n * 2)

-					return stbi__err("bad tRNS len", "Corrupt PNG");

-				has_trans = 1;

-				if (z->depth == 16) {

-					for (k = 0; k < s->img_n; ++k)

-						tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is

-				} else {

-					for (k = 0; k < s->img_n; ++k)

-						tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit

-																									   // images will be

-																									   // larger

-				}

-			}

-			break;

-		}

-		case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {

-			if (first)

-				return stbi__err("first not IHDR", "Corrupt PNG");

-			if (pal_img_n && !pal_len)

-				return stbi__err("no PLTE", "Corrupt PNG");

-			if (scan == STBI__SCAN_header) {

-				s->img_n = pal_img_n;

-				return 1;

-			}

-			if ((int)(ioff + c.length) < (int)ioff)

-				return 0;

-			if (ioff + c.length > idata_limit) {

-				stbi__uint32 idata_limit_old = idata_limit;

-				stbi_uc* p;

-				if (idata_limit == 0)

-					idata_limit = c.length > 4096 ? c.length : 4096;

-				while (ioff + c.length > idata_limit)

-					idata_limit *= 2;

-				STBI_NOTUSED(idata_limit_old);

-				p = (stbi_uc*)STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit);

-				if (p == NULL)

-					return stbi__err("outofmem", "Out of memory");

-				z->idata = p;

-			}

-			if (!stbi__getn(s, z->idata + ioff, c.length))

-				return stbi__err("outofdata", "Corrupt PNG");

-			ioff += c.length;

-			break;

-		}

-		case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {

-			stbi__uint32 raw_len, bpl;

-			if (first)

-				return stbi__err("first not IHDR", "Corrupt PNG");

-			if (scan != STBI__SCAN_load)

-				return 1;

-			if (z->idata == NULL)

-				return stbi__err("no IDAT", "Corrupt PNG");

-			// initial guess for decoded data size to avoid unnecessary reallocs

-			bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component

-			raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;

-			z->expanded = (stbi_uc*)stbi_zlib_decode_malloc_guesssize_headerflag((char*)z->idata, ioff, raw_len, (int*)&raw_len, !is_iphone);

-			if (z->expanded == NULL)

-				return 0; // zlib should set error

-			STBI_FREE(z->idata);

-			z->idata = NULL;

-			if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)

-				s->img_out_n = s->img_n + 1;

-			else

-				s->img_out_n = s->img_n;

-			if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace))

-				return 0;

-			if (has_trans) {

-				if (z->depth == 16) {

-					if (!stbi__compute_transparency16(z, tc16, s->img_out_n))

-						return 0;

-				} else {

-					if (!stbi__compute_transparency(z, tc, s->img_out_n))

-						return 0;

-				}

-			}

-			if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)

-				stbi__de_iphone(z);

-			if (pal_img_n) {

-				// pal_img_n == 3 or 4

-				s->img_n = pal_img_n; // record the actual colors we had

-				s->img_out_n = pal_img_n;

-				if (req_comp >= 3)

-					s->img_out_n = req_comp;

-				if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))

-					return 0;

-			} else if (has_trans) {

-				// non-paletted image with tRNS -> source image has (constant)

-				// alpha

-				++s->img_n;

-			}

-			STBI_FREE(z->expanded);

-			z->expanded = NULL;

-			// end of PNG chunk, read and skip CRC

-			stbi__get32be(s);

-			return 1;

-		}

-		default:

-			// if critical, fail

-			if (first)

-				return stbi__err("first not IHDR", "Corrupt PNG");

-			if ((c.type & (1 << 29)) == 0) {

-#ifndef STBI_NO_FAILURE_STRINGS

-				// not threadsafe

-				static char invalid_chunk[] = "XXXX PNG chunk not known";

-				invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);

-				invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);

-				invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);

-				invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);

-#endif

-				return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");

-			}

-			stbi__skip(s, c.length);

-			break;

-		}

-		// end of PNG chunk, read and skip CRC

-		stbi__get32be(s);

-	}

-}

-static void* stbi__do_png(stbi__png* p, int* x, int* y, int* n, int req_comp, stbi__result_info* ri) {

-	void* result = NULL;

-	if (req_comp < 0 || req_comp > 4)

-		return stbi__errpuc("bad req_comp", "Internal error");

-	if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {

-		if (p->depth < 8)

-			ri->bits_per_channel = 8;

-		else

-			ri->bits_per_channel = p->depth;

-		result = p->out;

-		p->out = NULL;

-		if (req_comp && req_comp != p->s->img_out_n) {

-			if (ri->bits_per_channel == 8)

-				result = stbi__convert_format((unsigned char*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);

-			else

-				result = stbi__convert_format16((stbi__uint16*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);

-			p->s->img_out_n = req_comp;

-			if (result == NULL)

-				return result;

-		}

-		*x = p->s->img_x;

-		*y = p->s->img_y;

-		if (n)

-			*n = p->s->img_n;

-	}

-	STBI_FREE(p->out);

-	p->out = NULL;

-	STBI_FREE(p->expanded);

-	p->expanded = NULL;

-	STBI_FREE(p->idata);

-	p->idata = NULL;

-	return result;

-}

-static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	stbi__png p;

-	p.s = s;

-	return stbi__do_png(&p, x, y, comp, req_comp, ri);

-}

-static int stbi__png_test(stbi__context* s) {

-	int r;

-	r = stbi__check_png_header(s);

-	stbi__rewind(s);

-	return r;

-}

-static int stbi__png_info_raw(stbi__png* p, int* x, int* y, int* comp) {

-	if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {

-		stbi__rewind(p->s);

-		return 0;

-	}

-	if (x)

-		*x = p->s->img_x;

-	if (y)

-		*y = p->s->img_y;

-	if (comp)

-		*comp = p->s->img_n;

-	return 1;

-}

-static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp) {

-	stbi__png p;

-	p.s = s;

-	return stbi__png_info_raw(&p, x, y, comp);

-}

-static int stbi__png_is16(stbi__context* s) {

-	stbi__png p;

-	p.s = s;

-	if (!stbi__png_info_raw(&p, NULL, NULL, NULL))

-		return 0;

-	if (p.depth != 16) {

-		stbi__rewind(p.s);

-		return 0;

-	}

-	return 1;

-}

-#endif

-// Microsoft/Windows BMP image

-#ifndef STBI_NO_BMP

-static int stbi__bmp_test_raw(stbi__context* s) {

-	int r;

-	int sz;

-	if (stbi__get8(s) != 'B')

-		return 0;

-	if (stbi__get8(s) != 'M')

-		return 0;

-	stbi__get32le(s); // discard filesize

-	stbi__get16le(s); // discard reserved

-	stbi__get16le(s); // discard reserved

-	stbi__get32le(s); // discard data offset

-	sz = stbi__get32le(s);

-	r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);

-	return r;

-}

-static int stbi__bmp_test(stbi__context* s) {

-	int r = stbi__bmp_test_raw(s);

-	stbi__rewind(s);

-	return r;

-}

-// returns 0..31 for the highest set bit

-static int stbi__high_bit(unsigned int z) {

-	int n = 0;

-	if (z == 0)

-		return -1;

-	if (z >= 0x10000) {

-		n += 16;

-		z >>= 16;

-	}

-	if (z >= 0x00100) {

-		n += 8;

-		z >>= 8;

-	}

-	if (z >= 0x00010) {

-		n += 4;

-		z >>= 4;

-	}

-	if (z >= 0x00004) {

-		n += 2;

-		z >>= 2;

-	}

-	if (z >= 0x00002) {

-		n += 1; /* >>=  1;*/

-	}

-	return n;

-}

-static int stbi__bitcount(unsigned int a) {

-	a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2

-	a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4

-	a = (a + (a >> 4)) & 0x0f0f0f0f;				// max 8 per 4, now 8 bits

-	a = (a + (a >> 8));								// max 16 per 8 bits

-	a = (a + (a >> 16));							// max 32 per 8 bits

-	return a & 0xff;

-}

-// extract an arbitrarily-aligned N-bit value (N=bits)

-// from v, and then make it 8-bits long and fractionally

-// extend it to full full range.

-static int stbi__shiftsigned(unsigned int v, int shift, int bits) {

-	static unsigned int mul_table[9] = {

-		0,

-		0xff /*0b11111111*/,

-		0x55 /*0b01010101*/,

-		0x49 /*0b01001001*/,

-		0x11 /*0b00010001*/,

-		0x21 /*0b00100001*/,

-		0x41 /*0b01000001*/,

-		0x81 /*0b10000001*/,

-		0x01 /*0b00000001*/,

-	};

-	static unsigned int shift_table[9] = {

-		0, 0, 0, 1, 0, 2, 4, 6, 0,

-	};

-	if (shift < 0)

-		v <<= -shift;

-	else

-		v >>= shift;

-	STBI_ASSERT(v < 256);

-	v >>= (8 - bits);

-	STBI_ASSERT(bits >= 0 && bits <= 8);

-	return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];

-}

-typedef struct {

-	int bpp, offset, hsz;

-	unsigned int mr, mg, mb, ma, all_a;

-	int extra_read;

-} stbi__bmp_data;

-static void* stbi__bmp_parse_header(stbi__context* s, stbi__bmp_data* info) {

-	int hsz;

-	if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M')

-		return stbi__errpuc("not BMP", "Corrupt BMP");

-	stbi__get32le(s); // discard filesize

-	stbi__get16le(s); // discard reserved

-	stbi__get16le(s); // discard reserved

-	info->offset = stbi__get32le(s);

-	info->hsz = hsz = stbi__get32le(s);

-	info->mr = info->mg = info->mb = info->ma = 0;

-	info->extra_read = 14;

-	if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124)

-		return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");

-	if (hsz == 12) {

-		s->img_x = stbi__get16le(s);

-		s->img_y = stbi__get16le(s);

-	} else {

-		s->img_x = stbi__get32le(s);

-		s->img_y = stbi__get32le(s);

-	}

-	if (stbi__get16le(s) != 1)

-		return stbi__errpuc("bad BMP", "bad BMP");

-	info->bpp = stbi__get16le(s);

-	if (hsz != 12) {

-		int compress = stbi__get32le(s);

-		if (compress == 1 || compress == 2)

-			return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");

-		stbi__get32le(s); // discard sizeof

-		stbi__get32le(s); // discard hres

-		stbi__get32le(s); // discard vres

-		stbi__get32le(s); // discard colorsused

-		stbi__get32le(s); // discard max important

-		if (hsz == 40 || hsz == 56) {

-			if (hsz == 56) {

-				stbi__get32le(s);

-				stbi__get32le(s);

-				stbi__get32le(s);

-				stbi__get32le(s);

-			}

-			if (info->bpp == 16 || info->bpp == 32) {

-				if (compress == 0) {

-					if (info->bpp == 32) {

-						info->mr = 0xffu << 16;

-						info->mg = 0xffu << 8;

-						info->mb = 0xffu << 0;

-						info->ma = 0xffu << 24;

-						info->all_a = 0; // if all_a is 0 at end, then we loaded

-										 // alpha channel but it was all 0

-					} else {

-						info->mr = 31u << 10;

-						info->mg = 31u << 5;

-						info->mb = 31u << 0;

-					}

-				} else if (compress == 3) {

-					info->mr = stbi__get32le(s);

-					info->mg = stbi__get32le(s);

-					info->mb = stbi__get32le(s);

-					info->extra_read += 12;

-					// not documented, but generated by photoshop and handled by

-					// mspaint

-					if (info->mr == info->mg && info->mg == info->mb) {

-						// ?!?!?

-						return stbi__errpuc("bad BMP", "bad BMP");

-					}

-				} else

-					return stbi__errpuc("bad BMP", "bad BMP");

-			}

-		} else {

-			int i;

-			if (hsz != 108 && hsz != 124)

-				return stbi__errpuc("bad BMP", "bad BMP");

-			info->mr = stbi__get32le(s);

-			info->mg = stbi__get32le(s);

-			info->mb = stbi__get32le(s);

-			info->ma = stbi__get32le(s);

-			stbi__get32le(s); // discard color space

-			for (i = 0; i < 12; ++i)

-				stbi__get32le(s); // discard color space parameters

-			if (hsz == 124) {

-				stbi__get32le(s); // discard rendering intent

-				stbi__get32le(s); // discard offset of profile data

-				stbi__get32le(s); // discard size of profile data

-				stbi__get32le(s); // discard reserved

-			}

-		}

-	}

-	return (void*)1;

-}

-static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	stbi_uc* out;

-	unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;

-	stbi_uc pal[256][4];

-	int psize = 0, i, j, width;

-	int flip_vertically, pad, target;

-	stbi__bmp_data info;

-	STBI_NOTUSED(ri);

-	info.all_a = 255;

-	if (stbi__bmp_parse_header(s, &info) == NULL)

-		return NULL; // error code already set

-	flip_vertically = ((int)s->img_y) > 0;

-	s->img_y = abs((int)s->img_y);

-	mr = info.mr;

-	mg = info.mg;

-	mb = info.mb;

-	ma = info.ma;

-	all_a = info.all_a;

-	if (info.hsz == 12) {

-		if (info.bpp < 24)

-			psize = (info.offset - info.extra_read - 24) / 3;

-	} else {

-		if (info.bpp < 16)

-			psize = (info.offset - info.extra_read - info.hsz) >> 2;

-	}

-	if (psize == 0) {

-		STBI_ASSERT(info.offset == (s->img_buffer - s->buffer_start));

-	}

-	if (info.bpp == 24 && ma == 0xff000000)

-		s->img_n = 3;

-	else

-		s->img_n = ma ? 4 : 3;

-	if (req_comp && req_comp >= 3) // we can directly decode 3 or 4

-		target = req_comp;

-	else

-		target = s->img_n; // if they want monochrome, we'll post-convert

-	// sanity-check size

-	if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))

-		return stbi__errpuc("too large", "Corrupt BMP");

-	out = (stbi_uc*)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);

-	if (!out)

-		return stbi__errpuc("outofmem", "Out of memory");

-	if (info.bpp < 16) {

-		int z = 0;

-		if (psize == 0 || psize > 256) {

-			STBI_FREE(out);

-			return stbi__errpuc("invalid", "Corrupt BMP");

-		}

-		for (i = 0; i < psize; ++i) {

-			pal[i][2] = stbi__get8(s);

-			pal[i][1] = stbi__get8(s);

-			pal[i][0] = stbi__get8(s);

-			if (info.hsz != 12)

-				stbi__get8(s);

-			pal[i][3] = 255;

-		}

-		stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));

-		if (info.bpp == 1)

-			width = (s->img_x + 7) >> 3;

-		else if (info.bpp == 4)

-			width = (s->img_x + 1) >> 1;

-		else if (info.bpp == 8)

-			width = s->img_x;

-		else {

-			STBI_FREE(out);

-			return stbi__errpuc("bad bpp", "Corrupt BMP");

-		}

-		pad = (-width) & 3;

-		if (info.bpp == 1) {

-			for (j = 0; j < (int)s->img_y; ++j) {

-				int bit_offset = 7, v = stbi__get8(s);

-				for (i = 0; i < (int)s->img_x; ++i) {

-					int color = (v >> bit_offset) & 0x1;

-					out[z++] = pal[color][0];

-					out[z++] = pal[color][1];

-					out[z++] = pal[color][2];

-					if (target == 4)

-						out[z++] = 255;

-					if (i + 1 == (int)s->img_x)

-						break;

-					if ((--bit_offset) < 0) {

-						bit_offset = 7;

-						v = stbi__get8(s);

-					}

-				}

-				stbi__skip(s, pad);

-			}

-		} else {

-			for (j = 0; j < (int)s->img_y; ++j) {

-				for (i = 0; i < (int)s->img_x; i += 2) {

-					int v = stbi__get8(s), v2 = 0;

-					if (info.bpp == 4) {

-						v2 = v & 15;

-						v >>= 4;

-					}

-					out[z++] = pal[v][0];

-					out[z++] = pal[v][1];

-					out[z++] = pal[v][2];

-					if (target == 4)

-						out[z++] = 255;

-					if (i + 1 == (int)s->img_x)

-						break;

-					v = (info.bpp == 8) ? stbi__get8(s) : v2;

-					out[z++] = pal[v][0];

-					out[z++] = pal[v][1];

-					out[z++] = pal[v][2];

-					if (target == 4)

-						out[z++] = 255;

-				}

-				stbi__skip(s, pad);

-			}

-		}

-	} else {

-		int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;

-		int z = 0;

-		int easy = 0;

-		stbi__skip(s, info.offset - info.extra_read - info.hsz);

-		if (info.bpp == 24)

-			width = 3 * s->img_x;

-		else if (info.bpp == 16)

-			width = 2 * s->img_x;

-		else /* bpp = 32 and pad = 0 */

-			width = 0;

-		pad = (-width) & 3;

-		if (info.bpp == 24) {

-			easy = 1;

-		} else if (info.bpp == 32) {

-			if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)

-				easy = 2;

-		}

-		if (!easy) {

-			if (!mr || !mg || !mb) {

-				STBI_FREE(out);

-				return stbi__errpuc("bad masks", "Corrupt BMP");

-			}

-			// right shift amt to put high bit in position #7

-			rshift = stbi__high_bit(mr) - 7;

-			rcount = stbi__bitcount(mr);

-			gshift = stbi__high_bit(mg) - 7;

-			gcount = stbi__bitcount(mg);

-			bshift = stbi__high_bit(mb) - 7;

-			bcount = stbi__bitcount(mb);

-			ashift = stbi__high_bit(ma) - 7;

-			acount = stbi__bitcount(ma);

-		}

-		for (j = 0; j < (int)s->img_y; ++j) {

-			if (easy) {

-				for (i = 0; i < (int)s->img_x; ++i) {

-					unsigned char a;

-					out[z + 2] = stbi__get8(s);

-					out[z + 1] = stbi__get8(s);

-					out[z + 0] = stbi__get8(s);

-					z += 3;

-					a = (easy == 2 ? stbi__get8(s) : 255);

-					all_a |= a;

-					if (target == 4)

-						out[z++] = a;

-				}

-			} else {

-				int bpp = info.bpp;

-				for (i = 0; i < (int)s->img_x; ++i) {

-					stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s) : stbi__get32le(s));

-					unsigned int a;

-					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));

-					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));

-					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));

-					a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);

-					all_a |= a;

-					if (target == 4)

-						out[z++] = STBI__BYTECAST(a);

-				}

-			}

-			stbi__skip(s, pad);

-		}

-	}

-	// if alpha channel is all 0s, replace with all 255s

-	if (target == 4 && all_a == 0)

-		for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)

-			out[i] = 255;

-	if (flip_vertically) {

-		stbi_uc t;

-		for (j = 0; j<(int)s->img_y>> 1; ++j) {

-			stbi_uc* p1 = out + j * s->img_x * target;

-			stbi_uc* p2 = out + (s->img_y - 1 - j) * s->img_x * target;

-			for (i = 0; i < (int)s->img_x * target; ++i) {

-				t = p1[i];

-				p1[i] = p2[i];

-				p2[i] = t;

-			}

-		}

-	}

-	if (req_comp && req_comp != target) {

-		out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);

-		if (out == NULL)

-			return out; // stbi__convert_format frees input on failure

-	}

-	*x = s->img_x;

-	*y = s->img_y;

-	if (comp)

-		*comp = s->img_n;

-	return out;

-}

-#endif

-// Targa Truevision - TGA

-// by Jonathan Dummer

-#ifndef STBI_NO_TGA

-// returns STBI_rgb or whatever, 0 on error

-static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) {

-	// only RGB or RGBA (incl. 16bit) or grey allowed

-	if (is_rgb16)

-		*is_rgb16 = 0;

-	switch (bits_per_pixel) {

-	case 8:

-		return STBI_grey;

-	case 16:

-		if (is_grey)

-			return STBI_grey_alpha;

-		// fallthrough

-	case 15:

-		if (is_rgb16)

-			*is_rgb16 = 1;

-		return STBI_rgb;

-	case 24: // fallthrough

-	case 32:

-		return bits_per_pixel / 8;

-	default:

-		return 0;

-	}

-}

-static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp) {

-	int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;

-	int sz, tga_colormap_type;

-	stbi__get8(s);					   // discard Offset

-	tga_colormap_type = stbi__get8(s); // colormap type

-	if (tga_colormap_type > 1) {

-		stbi__rewind(s);

-		return 0; // only RGB or indexed allowed

-	}

-	tga_image_type = stbi__get8(s); // image type

-	if (tga_colormap_type == 1) {   // colormapped (paletted) image

-		if (tga_image_type != 1 && tga_image_type != 9) {

-			stbi__rewind(s);

-			return 0;

-		}

-		stbi__skip(s,

-				   4);		// skip index of first colormap entry and number of entries

-		sz = stbi__get8(s); //   check bits per palette color entry

-		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {

-			stbi__rewind(s);

-			return 0;

-		}

-		stbi__skip(s, 4); // skip image x and y origin

-		tga_colormap_bpp = sz;

-	} else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE

-		if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11)) {

-			stbi__rewind(s);

-			return 0; // only RGB or grey allowed, +/- RLE

-		}

-		stbi__skip(s, 9); // skip colormap specification and image x/y origin

-		tga_colormap_bpp = 0;

-	}

-	tga_w = stbi__get16le(s);

-	if (tga_w < 1) {

-		stbi__rewind(s);

-		return 0; // test width

-	}

-	tga_h = stbi__get16le(s);

-	if (tga_h < 1) {

-		stbi__rewind(s);

-		return 0; // test height

-	}

-	tga_bits_per_pixel = stbi__get8(s); // bits per pixel

-	stbi__get8(s);						// ignore alpha bits

-	if (tga_colormap_bpp != 0) {

-		if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {

-			// when using a colormap, tga_bits_per_pixel is the size of the

-			// indexes I don't think anything but 8 or 16bit indexes makes sense

-			stbi__rewind(s);

-			return 0;

-		}

-		tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);

-	} else {

-		tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);

-	}

-	if (!tga_comp) {

-		stbi__rewind(s);

-		return 0;

-	}

-	if (x)

-		*x = tga_w;

-	if (y)

-		*y = tga_h;

-	if (comp)

-		*comp = tga_comp;

-	return 1; // seems to have passed everything

-}

-static int stbi__tga_test(stbi__context* s) {

-	int res = 0;

-	int sz, tga_color_type;

-	stbi__get8(s);					//   discard Offset

-	tga_color_type = stbi__get8(s); //   color type

-	if (tga_color_type > 1)

-		goto errorEnd;		   //   only RGB or indexed allowed

-	sz = stbi__get8(s);		   //   image type

-	if (tga_color_type == 1) { // colormapped (paletted) image

-		if (sz != 1 && sz != 9)

-			goto errorEnd; // colortype 1 demands image type 1 or 9

-		stbi__skip(s,

-				   4);		// skip index of first colormap entry and number of entries

-		sz = stbi__get8(s); //   check bits per palette color entry

-		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))

-			goto errorEnd;

-		stbi__skip(s, 4); // skip image x and y origin

-	} else {			  // "normal" image w/o colormap

-		if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11))

-			goto errorEnd; // only RGB or grey allowed, +/- RLE

-		stbi__skip(s, 9);  // skip colormap specification and image x/y origin

-	}

-	if (stbi__get16le(s) < 1)

-		goto errorEnd; //   test width

-	if (stbi__get16le(s) < 1)

-		goto errorEnd;  //   test height

-	sz = stbi__get8(s); //   bits per pixel

-	if ((tga_color_type == 1) && (sz != 8) && (sz != 16))

-		goto errorEnd; // for colormapped images, bpp is size of an index

-	if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))

-		goto errorEnd;

-	res = 1; // if we got this far, everything's good and we can return 1

-			 // instead of 0

-errorEnd:

-	stbi__rewind(s);

-	return res;

-}

-// read 16bit value and convert to 24bit RGB

-static void stbi__tga_read_rgb16(stbi__context* s, stbi_uc* out) {

-	stbi__uint16 px = (stbi__uint16)stbi__get16le(s);

-	stbi__uint16 fiveBitMask = 31;

-	// we have 3 channels with 5bits each

-	int r = (px >> 10) & fiveBitMask;

-	int g = (px >> 5) & fiveBitMask;

-	int b = px & fiveBitMask;

-	// Note that this saves the data in RGB(A) order, so it doesn't need to be

-	// swapped later

-	out[0] = (stbi_uc)((r * 255) / 31);

-	out[1] = (stbi_uc)((g * 255) / 31);

-	out[2] = (stbi_uc)((b * 255) / 31);

-	// some people claim that the most significant bit might be used for alpha

-	// (possibly if an alpha-bit is set in the "image descriptor byte")

-	// but that only made 16bit test images completely translucent..

-	// so let's treat all 15 and 16bit TGAs as RGB with no alpha.

-}

-static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	//   read in the TGA header stuff

-	int tga_offset = stbi__get8(s);

-	int tga_indexed = stbi__get8(s);

-	int tga_image_type = stbi__get8(s);

-	int tga_is_RLE = 0;

-	int tga_palette_start = stbi__get16le(s);

-	int tga_palette_len = stbi__get16le(s);

-	int tga_palette_bits = stbi__get8(s);

-	int tga_x_origin = stbi__get16le(s);

-	int tga_y_origin = stbi__get16le(s);

-	int tga_width = stbi__get16le(s);

-	int tga_height = stbi__get16le(s);

-	int tga_bits_per_pixel = stbi__get8(s);

-	int tga_comp, tga_rgb16 = 0;

-	int tga_inverted = stbi__get8(s);

-	// int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused

-	// (useless?)

-	//   image data

-	unsigned char* tga_data;

-	unsigned char* tga_palette = NULL;

-	int i, j;

-	unsigned char raw_data[4] = {0};

-	int RLE_count = 0;

-	int RLE_repeating = 0;

-	int read_next_pixel = 1;

-	STBI_NOTUSED(ri);

-	STBI_NOTUSED(tga_x_origin); // @TODO

-	STBI_NOTUSED(tga_y_origin); // @TODO

-	//   do a tiny bit of precessing

-	if (tga_image_type >= 8) {

-		tga_image_type -= 8;

-		tga_is_RLE = 1;

-	}

-	tga_inverted = 1 - ((tga_inverted >> 5) & 1);

-	//   If I'm paletted, then I'll use the number of bits from the palette

-	if (tga_indexed)

-		tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);

-	else

-		tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);

-	if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have

-				   // ensured basic consistency

-		return stbi__errpuc("bad format", "Can't find out TGA pixelformat");

-	//   tga info

-	*x = tga_width;

-	*y = tga_height;

-	if (comp)

-		*comp = tga_comp;

-	if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))

-		return stbi__errpuc("too large", "Corrupt TGA");

-	tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);

-	if (!tga_data)

-		return stbi__errpuc("outofmem", "Out of memory");

-	// skip to the data's starting position (offset usually = 0)

-	stbi__skip(s, tga_offset);

-	if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {

-		for (i = 0; i < tga_height; ++i) {

-			int row = tga_inverted ? tga_height - i - 1 : i;

-			stbi_uc* tga_row = tga_data + row * tga_width * tga_comp;

-			stbi__getn(s, tga_row, tga_width * tga_comp);

-		}

-	} else {

-		//   do I need to load a palette?

-		if (tga_indexed) {

-			//   any data to skip? (offset usually = 0)

-			stbi__skip(s, tga_palette_start);

-			//   load the palette

-			tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);

-			if (!tga_palette) {

-				STBI_FREE(tga_data);

-				return stbi__errpuc("outofmem", "Out of memory");

-			}

-			if (tga_rgb16) {

-				stbi_uc* pal_entry = tga_palette;

-				STBI_ASSERT(tga_comp == STBI_rgb);

-				for (i = 0; i < tga_palette_len; ++i) {

-					stbi__tga_read_rgb16(s, pal_entry);

-					pal_entry += tga_comp;

-				}

-			} else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {

-				STBI_FREE(tga_data);

-				STBI_FREE(tga_palette);

-				return stbi__errpuc("bad palette", "Corrupt TGA");

-			}

-		}

-		//   load the data

-		for (i = 0; i < tga_width * tga_height; ++i) {

-			//   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?

-			if (tga_is_RLE) {

-				if (RLE_count == 0) {

-					//   yep, get the next byte as a RLE command

-					int RLE_cmd = stbi__get8(s);

-					RLE_count = 1 + (RLE_cmd & 127);

-					RLE_repeating = RLE_cmd >> 7;

-					read_next_pixel = 1;

-				} else if (!RLE_repeating) {

-					read_next_pixel = 1;

-				}

-			} else {

-				read_next_pixel = 1;

-			}

-			//   OK, if I need to read a pixel, do it now

-			if (read_next_pixel) {

-				//   load however much data we did have

-				if (tga_indexed) {

-					// read in index, then perform the lookup

-					int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);

-					if (pal_idx >= tga_palette_len) {

-						// invalid index

-						pal_idx = 0;

-					}

-					pal_idx *= tga_comp;

-					for (j = 0; j < tga_comp; ++j) {

-						raw_data[j] = tga_palette[pal_idx + j];

-					}

-				} else if (tga_rgb16) {

-					STBI_ASSERT(tga_comp == STBI_rgb);

-					stbi__tga_read_rgb16(s, raw_data);

-				} else {

-					//   read in the data raw

-					for (j = 0; j < tga_comp; ++j) {

-						raw_data[j] = stbi__get8(s);

-					}

-				}

-				//   clear the reading flag for the next pixel

-				read_next_pixel = 0;

-			} // end of reading a pixel

-			// copy data

-			for (j = 0; j < tga_comp; ++j)

-				tga_data[i * tga_comp + j] = raw_data[j];

-			//   in case we're in RLE mode, keep counting down

-			--RLE_count;

-		}

-		//   do I need to invert the image?

-		if (tga_inverted) {

-			for (j = 0; j * 2 < tga_height; ++j) {

-				int index1 = j * tga_width * tga_comp;

-				int index2 = (tga_height - 1 - j) * tga_width * tga_comp;

-				for (i = tga_width * tga_comp; i > 0; --i) {

-					unsigned char temp = tga_data[index1];

-					tga_data[index1] = tga_data[index2];

-					tga_data[index2] = temp;

-					++index1;

-					++index2;

-				}

-			}

-		}

-		//   clear my palette, if I had one

-		if (tga_palette != NULL) {

-			STBI_FREE(tga_palette);

-		}

-	}

-	// swap RGB - if the source data was RGB16, it already is in the right order

-	if (tga_comp >= 3 && !tga_rgb16) {

-		unsigned char* tga_pixel = tga_data;

-		for (i = 0; i < tga_width * tga_height; ++i) {

-			unsigned char temp = tga_pixel[0];

-			tga_pixel[0] = tga_pixel[2];

-			tga_pixel[2] = temp;

-			tga_pixel += tga_comp;

-		}

-	}

-	// convert to target component count

-	if (req_comp && req_comp != tga_comp)

-		tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);

-	//   the things I do to get rid of an error message, and yet keep

-	//   Microsoft's C compilers happy... [8^(

-	tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin = tga_y_origin = 0;

-	STBI_NOTUSED(tga_palette_start);

-	//   OK, done

-	return tga_data;

-}

-#endif

-// *************************************************************************************************

-// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz,

-// tweaked by STB

-#ifndef STBI_NO_PSD

-static int stbi__psd_test(stbi__context* s) {

-	int r = (stbi__get32be(s) == 0x38425053);

-	stbi__rewind(s);

-	return r;

-}

-static int stbi__psd_decode_rle(stbi__context* s, stbi_uc* p, int pixelCount) {

-	int count, nleft, len;

-	count = 0;

-	while ((nleft = pixelCount - count) > 0) {

-		len = stbi__get8(s);

-		if (len == 128) {

-			// No-op.

-		} else if (len < 128) {

-			// Copy next len+1 bytes literally.

-			len++;

-			if (len > nleft)

-				return 0; // corrupt data

-			count += len;

-			while (len) {

-				*p = stbi__get8(s);

-				p += 4;

-				len--;

-			}

-		} else if (len > 128) {

-			stbi_uc val;

-			// Next -len+1 bytes in the dest are replicated from next source

-			// byte. (Interpret len as a negative 8-bit int.)

-			len = 257 - len;

-			if (len > nleft)

-				return 0; // corrupt data

-			val = stbi__get8(s);

-			count += len;

-			while (len) {

-				*p = val;

-				p += 4;

-				len--;

-			}

-		}

-	}

-	return 1;

-}

-static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc) {

-	int pixelCount;

-	int channelCount, compression;

-	int channel, i;

-	int bitdepth;

-	int w, h;

-	stbi_uc* out;

-	STBI_NOTUSED(ri);

-	// Check identifier

-	if (stbi__get32be(s) != 0x38425053) // "8BPS"

-		return stbi__errpuc("not PSD", "Corrupt PSD image");

-	// Check file type version.

-	if (stbi__get16be(s) != 1)

-		return stbi__errpuc("wrong version", "Unsupported version of PSD image");

-	// Skip 6 reserved bytes.

-	stbi__skip(s, 6);

-	// Read the number of channels (R, G, B, A, etc).

-	channelCount = stbi__get16be(s);

-	if (channelCount < 0 || channelCount > 16)

-		return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");

-	// Read the rows and columns of the image.

-	h = stbi__get32be(s);

-	w = stbi__get32be(s);

-	// Make sure the depth is 8 bits.

-	bitdepth = stbi__get16be(s);

-	if (bitdepth != 8 && bitdepth != 16)

-		return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");

-	// Make sure the color mode is RGB.

-	// Valid options are:

-	//   0: Bitmap

-	//   1: Grayscale

-	//   2: Indexed color

-	//   3: RGB color

-	//   4: CMYK color

-	//   7: Multichannel

-	//   8: Duotone

-	//   9: Lab color

-	if (stbi__get16be(s) != 3)

-		return stbi__errpuc("wrong color format", "PSD is not in RGB color format");

-	// Skip the Mode Data.  (It's the palette for indexed color; other info for

-	// other modes.)

-	stbi__skip(s, stbi__get32be(s));

-	// Skip the image resources.  (resolution, pen tool paths, etc)

-	stbi__skip(s, stbi__get32be(s));

-	// Skip the reserved data.

-	stbi__skip(s, stbi__get32be(s));

-	// Find out if the data is compressed.

-	// Known values:

-	//   0: no compression

-	//   1: RLE compressed

-	compression = stbi__get16be(s);

-	if (compression > 1)

-		return stbi__errpuc("bad compression", "PSD has an unknown compression format");

-	// Check size

-	if (!stbi__mad3sizes_valid(4, w, h, 0))

-		return stbi__errpuc("too large", "Corrupt PSD");

-	// Create the destination image.

-	if (!compression && bitdepth == 16 && bpc == 16) {

-		out = (stbi_uc*)stbi__malloc_mad3(8, w, h, 0);

-		ri->bits_per_channel = 16;

-	} else

-		out = (stbi_uc*)stbi__malloc(4 * w * h);

-	if (!out)

-		return stbi__errpuc("outofmem", "Out of memory");

-	pixelCount = w * h;

-	// Initialize the data to zero.

-	// memset( out, 0, pixelCount * 4 );

-	// Finally, the image data.

-	if (compression) {

-		// RLE as used by .PSD and .TIFF

-		// Loop until you get the number of unpacked bytes you are expecting:

-		//     Read the next source byte into n.

-		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes

-		//     literally. Else if n is between -127 and -1 inclusive, copy the

-		//     next byte -n+1 times. Else if n is 128, noop.

-		// Endloop

-		// The RLE-compressed data is preceded by a 2-byte data count for each

-		// row in the data, which we're going to just skip.

-		stbi__skip(s, h * channelCount * 2);

-		// Read the RLE data by channel.

-		for (channel = 0; channel < 4; channel++) {

-			stbi_uc* p;

-			p = out + channel;

-			if (channel >= channelCount) {

-				// Fill this channel with default data.

-				for (i = 0; i < pixelCount; i++, p += 4)

-					*p = (channel == 3 ? 255 : 0);

-			} else {

-				// Read the RLE data.

-				if (!stbi__psd_decode_rle(s, p, pixelCount)) {

-					STBI_FREE(out);

-					return stbi__errpuc("corrupt", "bad RLE data");

-				}

-			}

-		}

-	} else {

-		// We're at the raw image data.  It's each channel in order (Red, Green,

-		// Blue, Alpha, ...) where each channel consists of an 8-bit (or 16-bit)

-		// value for each pixel in the image.

-		// Read the data by channel.

-		for (channel = 0; channel < 4; channel++) {

-			if (channel >= channelCount) {

-				// Fill this channel with default data.

-				if (bitdepth == 16 && bpc == 16) {

-					stbi__uint16* q = ((stbi__uint16*)out) + channel;

-					stbi__uint16 val = channel == 3 ? 65535 : 0;

-					for (i = 0; i < pixelCount; i++, q += 4)

-						*q = val;

-				} else {

-					stbi_uc* p = out + channel;

-					stbi_uc val = channel == 3 ? 255 : 0;

-					for (i = 0; i < pixelCount; i++, p += 4)

-						*p = val;

-				}

-			} else {

-				if (ri->bits_per_channel == 16) { // output bpc

-					stbi__uint16* q = ((stbi__uint16*)out) + channel;

-					for (i = 0; i < pixelCount; i++, q += 4)

-						*q = (stbi__uint16)stbi__get16be(s);

-				} else {

-					stbi_uc* p = out + channel;

-					if (bitdepth == 16) { // input bpc

-						for (i = 0; i < pixelCount; i++, p += 4)

-							*p = (stbi_uc)(stbi__get16be(s) >> 8);

-					} else {

-						for (i = 0; i < pixelCount; i++, p += 4)

-							*p = stbi__get8(s);

-					}

-				}

-			}

-		}

-	}

-	// remove weird white matte from PSD

-	if (channelCount >= 4) {

-		if (ri->bits_per_channel == 16) {

-			for (i = 0; i < w * h; ++i) {

-				stbi__uint16* pixel = (stbi__uint16*)out + 4 * i;

-				if (pixel[3] != 0 && pixel[3] != 65535) {

-					float a = pixel[3] / 65535.0f;

-					float ra = 1.0f / a;

-					float inv_a = 65535.0f * (1 - ra);

-					pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);

-					pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);

-					pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);

-				}

-			}

-		} else {

-			for (i = 0; i < w * h; ++i) {

-				unsigned char* pixel = out + 4 * i;

-				if (pixel[3] != 0 && pixel[3] != 255) {

-					float a = pixel[3] / 255.0f;

-					float ra = 1.0f / a;

-					float inv_a = 255.0f * (1 - ra);

-					pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);

-					pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);

-					pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);

-				}

-			}

-		}

-	}

-	// convert to desired output format

-	if (req_comp && req_comp != 4) {

-		if (ri->bits_per_channel == 16)

-			out = (stbi_uc*)stbi__convert_format16((stbi__uint16*)out, 4, req_comp, w, h);

-		else

-			out = stbi__convert_format(out, 4, req_comp, w, h);

-		if (out == NULL)

-			return out; // stbi__convert_format frees input on failure

-	}

-	if (comp)

-		*comp = 4;

-	*y = h;

-	*x = w;

-	return out;

-}

-#endif

-// *************************************************************************************************

-// Softimage PIC loader

-// by Tom Seddon

-//

-// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format

-// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/

-#ifndef STBI_NO_PIC

-static int stbi__pic_is4(stbi__context* s, const char* str) {

-	int i;

-	for (i = 0; i < 4; ++i)

-		if (stbi__get8(s) != (stbi_uc)str[i])

-			return 0;

-	return 1;

-}

-static int stbi__pic_test_core(stbi__context* s) {

-	int i;

-	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))

-		return 0;

-	for (i = 0; i < 84; ++i)

-		stbi__get8(s);

-	if (!stbi__pic_is4(s, "PICT"))

-		return 0;

-	return 1;

-}

-typedef struct {

-	stbi_uc size, type, channel;

-} stbi__pic_packet;

-static stbi_uc* stbi__readval(stbi__context* s, int channel, stbi_uc* dest) {

-	int mask = 0x80, i;

-	for (i = 0; i < 4; ++i, mask >>= 1) {

-		if (channel & mask) {

-			if (stbi__at_eof(s))

-				return stbi__errpuc("bad file", "PIC file too short");

-			dest[i] = stbi__get8(s);

-		}

-	}

-	return dest;

-}

-static void stbi__copyval(int channel, stbi_uc* dest, const stbi_uc* src) {

-	int mask = 0x80, i;

-	for (i = 0; i < 4; ++i, mask >>= 1)

-		if (channel & mask)

-			dest[i] = src[i];

-}

-static stbi_uc* stbi__pic_load_core(stbi__context* s, int width, int height, int* comp, stbi_uc* result) {

-	int act_comp = 0, num_packets = 0, y, chained;

-	stbi__pic_packet packets[10];

-	// this will (should...) cater for even some bizarre stuff like having data

-	// for the same channel in multiple packets.

-	do {

-		stbi__pic_packet* packet;

-		if (num_packets == sizeof(packets) / sizeof(packets[0]))

-			return stbi__errpuc("bad format", "too many packets");

-		packet = &packets[num_packets++];

-		chained = stbi__get8(s);

-		packet->size = stbi__get8(s);

-		packet->type = stbi__get8(s);

-		packet->channel = stbi__get8(s);

-		act_comp |= packet->channel;

-		if (stbi__at_eof(s))

-			return stbi__errpuc("bad file", "file too short (reading packets)");

-		if (packet->size != 8)

-			return stbi__errpuc("bad format", "packet isn't 8bpp");

-	} while (chained);

-	*comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?

-	for (y = 0; y < height; ++y) {

-		int packet_idx;

-		for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {

-			stbi__pic_packet* packet = &packets[packet_idx];

-			stbi_uc* dest = result + y * width * 4;

-			switch (packet->type) {

-			default:

-				return stbi__errpuc("bad format", "packet has bad compression type");

-			case 0: { // uncompressed

-				int x;

-				for (x = 0; x < width; ++x, dest += 4)

-					if (!stbi__readval(s, packet->channel, dest))

-						return 0;

-				break;

-			}

-			case 1: // Pure RLE

-			{

-				int left = width, i;

-				while (left > 0) {

-					stbi_uc count, value[4];

-					count = stbi__get8(s);

-					if (stbi__at_eof(s))

-						return stbi__errpuc("bad file", "file too short (pure read count)");

-					if (count > left)

-						count = (stbi_uc)left;

-					if (!stbi__readval(s, packet->channel, value))

-						return 0;

-					for (i = 0; i < count; ++i, dest += 4)

-						stbi__copyval(packet->channel, dest, value);

-					left -= count;

-				}

-			} break;

-			case 2: { // Mixed RLE

-				int left = width;

-				while (left > 0) {

-					int count = stbi__get8(s), i;

-					if (stbi__at_eof(s))

-						return stbi__errpuc("bad file", "file too short (mixed read count)");

-					if (count >= 128) { // Repeated

-						stbi_uc value[4];

-						if (count == 128)

-							count = stbi__get16be(s);

-						else

-							count -= 127;

-						if (count > left)

-							return stbi__errpuc("bad file", "scanline overrun");

-						if (!stbi__readval(s, packet->channel, value))

-							return 0;

-						for (i = 0; i < count; ++i, dest += 4)

-							stbi__copyval(packet->channel, dest, value);

-					} else { // Raw

-						++count;

-						if (count > left)

-							return stbi__errpuc("bad file", "scanline overrun");

-						for (i = 0; i < count; ++i, dest += 4)

-							if (!stbi__readval(s, packet->channel, dest))

-								return 0;

-					}

-					left -= count;

-				}

-				break;

-			}

-			}

-		}

-	}

-	return result;

-}

-static void* stbi__pic_load(stbi__context* s, int* px, int* py, int* comp, int req_comp, stbi__result_info* ri) {

-	stbi_uc* result;

-	int i, x, y, internal_comp;

-	STBI_NOTUSED(ri);

-	if (!comp)

-		comp = &internal_comp;

-	for (i = 0; i < 92; ++i)

-		stbi__get8(s);

-	x = stbi__get16be(s);

-	y = stbi__get16be(s);

-	if (stbi__at_eof(s))

-		return stbi__errpuc("bad file", "file too short (pic header)");

-	if (!stbi__mad3sizes_valid(x, y, 4, 0))

-		return stbi__errpuc("too large", "PIC image too large to decode");

-	stbi__get32be(s); // skip `ratio'

-	stbi__get16be(s); // skip `fields'

-	stbi__get16be(s); // skip `pad'

-	// intermediate buffer is RGBA

-	result = (stbi_uc*)stbi__malloc_mad3(x, y, 4, 0);

-	memset(result, 0xff, x * y * 4);

-	if (!stbi__pic_load_core(s, x, y, comp, result)) {

-		STBI_FREE(result);

-		result = 0;

-	}

-	*px = x;

-	*py = y;

-	if (req_comp == 0)

-		req_comp = *comp;

-	result = stbi__convert_format(result, 4, req_comp, x, y);

-	return result;

-}

-static int stbi__pic_test(stbi__context* s) {

-	int r = stbi__pic_test_core(s);

-	stbi__rewind(s);

-	return r;

-}

-#endif

-// *************************************************************************************************

-// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb

-#ifndef STBI_NO_GIF

-typedef struct {

-	stbi__int16 prefix;

-	stbi_uc first;

-	stbi_uc suffix;

-} stbi__gif_lzw;

-typedef struct {

-	int w, h;

-	stbi_uc* out;		 // output buffer (always 4 components)

-	stbi_uc* background; // The current "background" as far as a gif is concerned

-	stbi_uc* history;

-	int flags, bgindex, ratio, transparent, eflags;

-	stbi_uc pal[256][4];

-	stbi_uc lpal[256][4];

-	stbi__gif_lzw codes[8192];

-	stbi_uc* color_table;

-	int parse, step;

-	int lflags;

-	int start_x, start_y;

-	int max_x, max_y;

-	int cur_x, cur_y;

-	int line_size;

-	int delay;

-} stbi__gif;

-static int stbi__gif_test_raw(stbi__context* s) {

-	int sz;

-	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')

-		return 0;

-	sz = stbi__get8(s);

-	if (sz != '9' && sz != '7')

-		return 0;

-	if (stbi__get8(s) != 'a')

-		return 0;

-	return 1;

-}

-static int stbi__gif_test(stbi__context* s) {

-	int r = stbi__gif_test_raw(s);

-	stbi__rewind(s);

-	return r;

-}

-static void stbi__gif_parse_colortable(stbi__context* s, stbi_uc pal[256][4], int num_entries, int transp) {

-	int i;

-	for (i = 0; i < num_entries; ++i) {

-		pal[i][2] = stbi__get8(s);

-		pal[i][1] = stbi__get8(s);

-		pal[i][0] = stbi__get8(s);

-		pal[i][3] = transp == i ? 0 : 255;

-	}

-}

-static int stbi__gif_header(stbi__context* s, stbi__gif* g, int* comp, int is_info) {

-	stbi_uc version;

-	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')

-		return stbi__err("not GIF", "Corrupt GIF");

-	version = stbi__get8(s);

-	if (version != '7' && version != '9')

-		return stbi__err("not GIF", "Corrupt GIF");

-	if (stbi__get8(s) != 'a')

-		return stbi__err("not GIF", "Corrupt GIF");

-	stbi__g_failure_reason = "";

-	g->w = stbi__get16le(s);

-	g->h = stbi__get16le(s);

-	g->flags = stbi__get8(s);

-	g->bgindex = stbi__get8(s);

-	g->ratio = stbi__get8(s);

-	g->transparent = -1;

-	if (comp != 0)

-		*comp = 4; // can't actually tell whether it's 3 or 4 until we parse the

-				   // comments

-	if (is_info)

-		return 1;

-	if (g->flags & 0x80)

-		stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);

-	return 1;

-}

-static int stbi__gif_info_raw(stbi__context* s, int* x, int* y, int* comp) {

-	stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));

-	if (!stbi__gif_header(s, g, comp, 1)) {

-		STBI_FREE(g);

-		stbi__rewind(s);

-		return 0;

-	}

-	if (x)

-		*x = g->w;

-	if (y)

-		*y = g->h;

-	STBI_FREE(g);

-	return 1;

-}

-static void stbi__out_gif_code(stbi__gif* g, stbi__uint16 code) {

-	stbi_uc *p, *c;

-	int idx;

-	// recurse to decode the prefixes, since the linked-list is backwards,

-	// and working backwards through an interleaved image would be nasty

-	if (g->codes[code].prefix >= 0)

-		stbi__out_gif_code(g, g->codes[code].prefix);

-	if (g->cur_y >= g->max_y)

-		return;

-	idx = g->cur_x + g->cur_y;

-	p = &g->out[idx];

-	g->history[idx / 4] = 1;

-	c = &g->color_table[g->codes[code].suffix * 4];

-	if (c[3] > 128) { // don't render transparent pixels;

-		p[0] = c[2];

-		p[1] = c[1];

-		p[2] = c[0];

-		p[3] = c[3];

-	}

-	g->cur_x += 4;

-	if (g->cur_x >= g->max_x) {

-		g->cur_x = g->start_x;

-		g->cur_y += g->step;

-		while (g->cur_y >= g->max_y && g->parse > 0) {

-			g->step = (1 << g->parse) * g->line_size;

-			g->cur_y = g->start_y + (g->step >> 1);

-			--g->parse;

-		}

-	}

-}

-static stbi_uc* stbi__process_gif_raster(stbi__context* s, stbi__gif* g) {

-	stbi_uc lzw_cs;

-	stbi__int32 len, init_code;

-	stbi__uint32 first;

-	stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;

-	stbi__gif_lzw* p;

-	lzw_cs = stbi__get8(s);

-	if (lzw_cs > 12)

-		return NULL;

-	clear = 1 << lzw_cs;

-	first = 1;

-	codesize = lzw_cs + 1;

-	codemask = (1 << codesize) - 1;

-	bits = 0;

-	valid_bits = 0;

-	for (init_code = 0; init_code < clear; init_code++) {

-		g->codes[init_code].prefix = -1;

-		g->codes[init_code].first = (stbi_uc)init_code;

-		g->codes[init_code].suffix = (stbi_uc)init_code;

-	}

-	// support no starting clear code

-	avail = clear + 2;

-	oldcode = -1;

-	len = 0;

-	for (;;) {

-		if (valid_bits < codesize) {

-			if (len == 0) {

-				len = stbi__get8(s); // start new block

-				if (len == 0)

-					return g->out;

-			}

-			--len;

-			bits |= (stbi__int32)stbi__get8(s) << valid_bits;

-			valid_bits += 8;

-		} else {

-			stbi__int32 code = bits & codemask;

-			bits >>= codesize;

-			valid_bits -= codesize;

-			// @OPTIMIZE: is there some way we can accelerate the non-clear

-			// path?

-			if (code == clear) { // clear code

-				codesize = lzw_cs + 1;

-				codemask = (1 << codesize) - 1;

-				avail = clear + 2;

-				oldcode = -1;

-				first = 0;

-			} else if (code == clear + 1) { // end of stream code

-				stbi__skip(s, len);

-				while ((len = stbi__get8(s)) > 0)

-					stbi__skip(s, len);

-				return g->out;

-			} else if (code <= avail) {

-				if (first) {

-					return stbi__errpuc("no clear code", "Corrupt GIF");

-				}

-				if (oldcode >= 0) {

-					p = &g->codes[avail++];

-					if (avail > 8192) {

-						return stbi__errpuc("too many codes", "Corrupt GIF");

-					}

-					p->prefix = (stbi__int16)oldcode;

-					p->first = g->codes[oldcode].first;

-					p->suffix = (code == avail) ? p->first : g->codes[code].first;

-				} else if (code == avail)

-					return stbi__errpuc("illegal code in raster", "Corrupt GIF");

-				stbi__out_gif_code(g, (stbi__uint16)code);

-				if ((avail & codemask) == 0 && avail <= 0x0FFF) {

-					codesize++;

-					codemask = (1 << codesize) - 1;

-				}

-				oldcode = code;

-			} else {

-				return stbi__errpuc("illegal code in raster", "Corrupt GIF");

-			}

-		}

-	}

-}

-// this function is designed to support animated gifs, although stb_image

-// doesn't support it two back is the image from two frames ago, used for a very

-// specific disposal format

-static stbi_uc* stbi__gif_load_next(stbi__context* s, stbi__gif* g, int* comp, int req_comp, stbi_uc* two_back) {

-	int dispose;

-	int first_frame;

-	int pi;

-	int pcount;

-	STBI_NOTUSED(req_comp);

-	// on first frame, any non-written pixels get the background colour

-	// (non-transparent)

-	first_frame = 0;

-	if (g->out == 0) {

-		if (!stbi__gif_header(s, g, comp, 0))

-			return 0; // stbi__g_failure_reason set by stbi__gif_header

-		if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))

-			return stbi__errpuc("too large", "GIF image is too large");

-		pcount = g->w * g->h;

-		g->out = (stbi_uc*)stbi__malloc(4 * pcount);

-		g->background = (stbi_uc*)stbi__malloc(4 * pcount);

-		g->history = (stbi_uc*)stbi__malloc(pcount);

-		if (!g->out || !g->background || !g->history)

-			return stbi__errpuc("outofmem", "Out of memory");

-		// image is treated as "transparent" at the start - ie, nothing

-		// overwrites the current background; background colour is only used for

-		// pixels that are not rendered first frame, after that "background"

-		// color refers to the color that was there the previous frame.

-		memset(g->out, 0x00, 4 * pcount);

-		memset(g->background, 0x00,

-			   4 * pcount); // state of the background (starts transparent)

-		memset(g->history, 0x00,

-			   pcount); // pixels that were affected previous frame

-		first_frame = 1;

-	} else {

-		// second frame - how do we dispoase of the previous one?

-		dispose = (g->eflags & 0x1C) >> 2;

-		pcount = g->w * g->h;

-		if ((dispose == 3) && (two_back == 0)) {

-			dispose = 2; // if I don't have an image to revert back to, default

-						 // to the old background

-		}

-		if (dispose == 3) { // use previous graphic

-			for (pi = 0; pi < pcount; ++pi) {

-				if (g->history[pi]) {

-					memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);

-				}

-			}

-		} else if (dispose == 2) {

-			// restore what was changed last frame to background before that

-			// frame;

-			for (pi = 0; pi < pcount; ++pi) {

-				if (g->history[pi]) {

-					memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);

-				}

-			}

-		} else {

-			// This is a non-disposal case eithe way, so just

-			// leave the pixels as is, and they will become the new background

-			// 1: do not dispose

-			// 0:  not specified.

-		}

-		// background is what out is after the undoing of the previou frame;

-		memcpy(g->background, g->out, 4 * g->w * g->h);

-	}

-	// clear my history;

-	memset(g->history, 0x00,

-		   g->w * g->h); // pixels that were affected previous frame

-	for (;;) {

-		int tag = stbi__get8(s);

-		switch (tag) {

-		case 0x2C: /* Image Descriptor */

-		{

-			stbi__int32 x, y, w, h;

-			stbi_uc* o;

-			x = stbi__get16le(s);

-			y = stbi__get16le(s);

-			w = stbi__get16le(s);

-			h = stbi__get16le(s);

-			if (((x + w) > (g->w)) || ((y + h) > (g->h)))

-				return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");

-			g->line_size = g->w * 4;

-			g->start_x = x * 4;

-			g->start_y = y * g->line_size;

-			g->max_x = g->start_x + w * 4;

-			g->max_y = g->start_y + h * g->line_size;

-			g->cur_x = g->start_x;

-			g->cur_y = g->start_y;

-			// if the width of the specified rectangle is 0, that means

-			// we may not see *any* pixels or the image is malformed;

-			// to make sure this is caught, move the current y down to

-			// max_y (which is what out_gif_code checks).

-			if (w == 0)

-				g->cur_y = g->max_y;

-			g->lflags = stbi__get8(s);

-			if (g->lflags & 0x40) {

-				g->step = 8 * g->line_size; // first interlaced spacing

-				g->parse = 3;

-			} else {

-				g->step = g->line_size;

-				g->parse = 0;

-			}

-			if (g->lflags & 0x80) {

-				stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);

-				g->color_table = (stbi_uc*)g->lpal;

-			} else if (g->flags & 0x80) {

-				g->color_table = (stbi_uc*)g->pal;

-			} else

-				return stbi__errpuc("missing color table", "Corrupt GIF");

-			o = stbi__process_gif_raster(s, g);

-			if (!o)

-				return NULL;

-			// if this was the first frame,

-			pcount = g->w * g->h;

-			if (first_frame && (g->bgindex > 0)) {

-				// if first frame, any pixel not drawn to gets the background

-				// color

-				for (pi = 0; pi < pcount; ++pi) {

-					if (g->history[pi] == 0) {

-						g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo

-													 // that; It will be reset next frame if need

-													 // be;

-						memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);

-					}

-				}

-			}

-			return o;

-		}

-		case 0x21: // Comment Extension.

-		{

-			int len;

-			int ext = stbi__get8(s);

-			if (ext == 0xF9) { // Graphic Control Extension.

-				len = stbi__get8(s);

-				if (len == 4) {

-					g->eflags = stbi__get8(s);

-					g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second,

-													  // saving as 1/1000ths.

-					// unset old transparent

-					if (g->transparent >= 0) {

-						g->pal[g->transparent][3] = 255;

-					}

-					if (g->eflags & 0x01) {

-						g->transparent = stbi__get8(s);

-						if (g->transparent >= 0) {

-							g->pal[g->transparent][3] = 0;

-						}

-					} else {

-						// don't need transparent

-						stbi__skip(s, 1);

-						g->transparent = -1;

-					}

-				} else {

-					stbi__skip(s, len);

-					break;

-				}

-			}

-			while ((len = stbi__get8(s)) != 0) {

-				stbi__skip(s, len);

-			}

-			break;

-		}

-		case 0x3B:				// gif stream termination code

-			return (stbi_uc*)s; // using '1' causes warning on some compilers

-		default:

-			return stbi__errpuc("unknown code", "Corrupt GIF");

-		}

-	}

-}

-static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp) {

-	if (stbi__gif_test(s)) {

-		int layers = 0;

-		stbi_uc* u = 0;

-		stbi_uc* out = 0;

-		stbi_uc* two_back = 0;

-		stbi__gif g;

-		int stride;

-		memset(&g, 0, sizeof(g));

-		if (delays) {

-			*delays = 0;

-		}

-		do {

-			u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);

-			if (u == (stbi_uc*)s)

-				u = 0; // end of animated gif marker

-			if (u) {

-				*x = g.w;

-				*y = g.h;

-				++layers;

-				stride = g.w * g.h * 4;

-				if (out) {

-					void* tmp = (stbi_uc*)STBI_REALLOC(out, layers * stride);

-					if (NULL == tmp) {

-						STBI_FREE(g.out);

-						STBI_FREE(g.history);

-						STBI_FREE(g.background);

-						return stbi__errpuc("outofmem", "Out of memory");

-					} else

-						out = (stbi_uc*)tmp;

-					if (delays) {

-						*delays = (int*)STBI_REALLOC(*delays, sizeof(int) * layers);

-					}

-				} else {

-					out = (stbi_uc*)stbi__malloc(layers * stride);

-					if (delays) {

-						*delays = (int*)stbi__malloc(layers * sizeof(int));

-					}

-				}

-				memcpy(out + ((layers - 1) * stride), u, stride);

-				if (layers >= 2) {

-					two_back = out - 2 * stride;

-				}

-				if (delays) {

-					(*delays)[layers - 1U] = g.delay;

-				}

-			}

-		} while (u != 0);

-		// free temp buffer;

-		STBI_FREE(g.out);

-		STBI_FREE(g.history);

-		STBI_FREE(g.background);

-		// do the final conversion after loading everything;

-		if (req_comp && req_comp != 4)

-			out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);

-		*z = layers;

-		return out;

-	} else {

-		return stbi__errpuc("not GIF", "Image was not as a gif type.");

-	}

-}

-static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	stbi_uc* u = 0;

-	stbi__gif g;

-	memset(&g, 0, sizeof(g));

-	STBI_NOTUSED(ri);

-	u = stbi__gif_load_next(s, &g, comp, req_comp, 0);

-	if (u == (stbi_uc*)s)

-		u = 0; // end of animated gif marker

-	if (u) {

-		*x = g.w;

-		*y = g.h;

-		// moved conversion to after successful load so that the same

-		// can be done for multiple frames.

-		if (req_comp && req_comp != 4)

-			u = stbi__convert_format(u, 4, req_comp, g.w, g.h);

-	} else if (g.out) {

-		// if there was an error and we allocated an image buffer, free it!

-		STBI_FREE(g.out);

-	}

-	// free buffers needed for multiple frame loading;

-	STBI_FREE(g.history);

-	STBI_FREE(g.background);

-	return u;

-}

-static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp) { return stbi__gif_info_raw(s, x, y, comp); }

-#endif

-// *************************************************************************************************

-// Radiance RGBE HDR loader

-// originally by Nicolas Schulz

-#ifndef STBI_NO_HDR

-static int stbi__hdr_test_core(stbi__context* s, const char* signature) {

-	int i;

-	for (i = 0; signature[i]; ++i)

-		if (stbi__get8(s) != signature[i])

-			return 0;

-	stbi__rewind(s);

-	return 1;

-}

-static int stbi__hdr_test(stbi__context* s) {

-	int r = stbi__hdr_test_core(s, "#?RADIANCE\n");

-	stbi__rewind(s);

-	if (!r) {

-		r = stbi__hdr_test_core(s, "#?RGBE\n");

-		stbi__rewind(s);

-	}

-	return r;

-}

-#define STBI__HDR_BUFLEN 1024

-static char* stbi__hdr_gettoken(stbi__context* z, char* buffer) {

-	int len = 0;

-	char c = '\0';

-	c = (char)stbi__get8(z);

-	while (!stbi__at_eof(z) && c != '\n') {

-		buffer[len++] = c;

-		if (len == STBI__HDR_BUFLEN - 1) {

-			// flush to end of line

-			while (!stbi__at_eof(z) && stbi__get8(z) != '\n')

-				;

-			break;

-		}

-		c = (char)stbi__get8(z);

-	}

-	buffer[len] = 0;

-	return buffer;

-}

-static void stbi__hdr_convert(float* output, stbi_uc* input, int req_comp) {

-	if (input[3] != 0) {

-		float f1;

-		// Exponent

-		f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));

-		if (req_comp <= 2)

-			output[0] = (input[0] + input[1] + input[2]) * f1 / 3;

-		else {

-			output[0] = input[0] * f1;

-			output[1] = input[1] * f1;

-			output[2] = input[2] * f1;

-		}

-		if (req_comp == 2)

-			output[1] = 1;

-		if (req_comp == 4)

-			output[3] = 1;

-	} else {

-		switch (req_comp) {

-		case 4:

-			output[3] = 1; /* fallthrough */

-		case 3:

-			output[0] = output[1] = output[2] = 0;

-			break;

-		case 2:

-			output[1] = 1; /* fallthrough */

-		case 1:

-			output[0] = 0;

-			break;

-		}

-	}

-}

-static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	char buffer[STBI__HDR_BUFLEN];

-	char* token;

-	int valid = 0;

-	int width, height;

-	stbi_uc* scanline;

-	float* hdr_data;

-	int len;

-	unsigned char count, value;

-	int i, j, k, c1, c2, z;

-	const char* headerToken;

-	STBI_NOTUSED(ri);

-	// Check identifier

-	headerToken = stbi__hdr_gettoken(s, buffer);

-	if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)

-		return stbi__errpf("not HDR", "Corrupt HDR image");

-	// Parse header

-	for (;;) {

-		token = stbi__hdr_gettoken(s, buffer);

-		if (token[0] == 0)

-			break;

-		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)

-			valid = 1;

-	}

-	if (!valid)

-		return stbi__errpf("unsupported format", "Unsupported HDR format");

-	// Parse width and height

-	// can't use sscanf() if we're not using stdio!

-	token = stbi__hdr_gettoken(s, buffer);

-	if (strncmp(token, "-Y ", 3))

-		return stbi__errpf("unsupported data layout", "Unsupported HDR format");

-	token += 3;

-	height = (int)strtol(token, &token, 10);

-	while (*token == ' ')

-		++token;

-	if (strncmp(token, "+X ", 3))

-		return stbi__errpf("unsupported data layout", "Unsupported HDR format");

-	token += 3;

-	width = (int)strtol(token, NULL, 10);

-	*x = width;

-	*y = height;

-	if (comp)

-		*comp = 3;

-	if (req_comp == 0)

-		req_comp = 3;

-	if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))

-		return stbi__errpf("too large", "HDR image is too large");

-	// Read data

-	hdr_data = (float*)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);

-	if (!hdr_data)

-		return stbi__errpf("outofmem", "Out of memory");

-	// Load image data

-	// image data is stored as some number of sca

-	if (width < 8 || width >= 32768) {

-		// Read flat data

-		for (j = 0; j < height; ++j) {

-			for (i = 0; i < width; ++i) {

-				stbi_uc rgbe[4];

-			main_decode_loop:

-				stbi__getn(s, rgbe, 4);

-				stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);

-			}

-		}

-	} else {

-		// Read RLE-encoded data

-		scanline = NULL;

-		for (j = 0; j < height; ++j) {

-			c1 = stbi__get8(s);

-			c2 = stbi__get8(s);

-			len = stbi__get8(s);

-			if (c1 != 2 || c2 != 2 || (len & 0x80)) {

-				// not run-length encoded, so we have to actually use THIS data

-				// as a decoded pixel (note this can't be a valid pixel--one of

-				// RGB must be

-				// >= 128)

-				stbi_uc rgbe[4];

-				rgbe[0] = (stbi_uc)c1;

-				rgbe[1] = (stbi_uc)c2;

-				rgbe[2] = (stbi_uc)len;

-				rgbe[3] = (stbi_uc)stbi__get8(s);

-				stbi__hdr_convert(hdr_data, rgbe, req_comp);

-				i = 1;

-				j = 0;

-				STBI_FREE(scanline);

-				goto main_decode_loop; // yes, this makes no sense

-			}

-			len <<= 8;

-			len |= stbi__get8(s);

-			if (len != width) {

-				STBI_FREE(hdr_data);

-				STBI_FREE(scanline);

-				return stbi__errpf("invalid decoded scanline length", "corrupt HDR");

-			}

-			if (scanline == NULL) {

-				scanline = (stbi_uc*)stbi__malloc_mad2(width, 4, 0);

-				if (!scanline) {

-					STBI_FREE(hdr_data);

-					return stbi__errpf("outofmem", "Out of memory");

-				}

-			}

-			for (k = 0; k < 4; ++k) {

-				int nleft;

-				i = 0;

-				while ((nleft = width - i) > 0) {

-					count = stbi__get8(s);

-					if (count > 128) {

-						// Run

-						value = stbi__get8(s);

-						count -= 128;

-						if (count > nleft) {

-							STBI_FREE(hdr_data);

-							STBI_FREE(scanline);

-							return stbi__errpf("corrupt", "bad RLE data in HDR");

-						}

-						for (z = 0; z < count; ++z)

-							scanline[i++ * 4 + k] = value;

-					} else {

-						// Dump

-						if (count > nleft) {

-							STBI_FREE(hdr_data);

-							STBI_FREE(scanline);

-							return stbi__errpf("corrupt", "bad RLE data in HDR");

-						}

-						for (z = 0; z < count; ++z)

-							scanline[i++ * 4 + k] = stbi__get8(s);

-					}

-				}

-			}

-			for (i = 0; i < width; ++i)

-				stbi__hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);

-		}

-		if (scanline)

-			STBI_FREE(scanline);

-	}

-	return hdr_data;

-}

-static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp) {

-	char buffer[STBI__HDR_BUFLEN];

-	char* token;

-	int valid = 0;

-	int dummy;

-	if (!x)

-		x = &dummy;

-	if (!y)

-		y = &dummy;

-	if (!comp)

-		comp = &dummy;

-	if (stbi__hdr_test(s) == 0) {

-		stbi__rewind(s);

-		return 0;

-	}

-	for (;;) {

-		token = stbi__hdr_gettoken(s, buffer);

-		if (token[0] == 0)

-			break;

-		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)

-			valid = 1;

-	}

-	if (!valid) {

-		stbi__rewind(s);

-		return 0;

-	}

-	token = stbi__hdr_gettoken(s, buffer);

-	if (strncmp(token, "-Y ", 3)) {

-		stbi__rewind(s);

-		return 0;

-	}

-	token += 3;

-	*y = (int)strtol(token, &token, 10);

-	while (*token == ' ')

-		++token;

-	if (strncmp(token, "+X ", 3)) {

-		stbi__rewind(s);

-		return 0;

-	}

-	token += 3;

-	*x = (int)strtol(token, NULL, 10);

-	*comp = 3;

-	return 1;

-}

-#endif // STBI_NO_HDR

-#ifndef STBI_NO_BMP

-static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp) {

-	void* p;

-	stbi__bmp_data info;

-	info.all_a = 255;

-	p = stbi__bmp_parse_header(s, &info);

-	stbi__rewind(s);

-	if (p == NULL)

-		return 0;

-	if (x)

-		*x = s->img_x;

-	if (y)

-		*y = s->img_y;

-	if (comp) {

-		if (info.bpp == 24 && info.ma == 0xff000000)

-			*comp = 3;

-		else

-			*comp = info.ma ? 4 : 3;

-	}

-	return 1;

-}

-#endif

-#ifndef STBI_NO_PSD

-static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp) {

-	int channelCount, dummy, depth;

-	if (!x)

-		x = &dummy;

-	if (!y)

-		y = &dummy;

-	if (!comp)

-		comp = &dummy;

-	if (stbi__get32be(s) != 0x38425053) {

-		stbi__rewind(s);

-		return 0;

-	}

-	if (stbi__get16be(s) != 1) {

-		stbi__rewind(s);

-		return 0;

-	}

-	stbi__skip(s, 6);

-	channelCount = stbi__get16be(s);

-	if (channelCount < 0 || channelCount > 16) {

-		stbi__rewind(s);

-		return 0;

-	}

-	*y = stbi__get32be(s);

-	*x = stbi__get32be(s);

-	depth = stbi__get16be(s);

-	if (depth != 8 && depth != 16) {

-		stbi__rewind(s);

-		return 0;

-	}

-	if (stbi__get16be(s) != 3) {

-		stbi__rewind(s);

-		return 0;

-	}

-	*comp = 4;

-	return 1;

-}

-static int stbi__psd_is16(stbi__context* s) {

-	int channelCount, depth;

-	if (stbi__get32be(s) != 0x38425053) {

-		stbi__rewind(s);

-		return 0;

-	}

-	if (stbi__get16be(s) != 1) {

-		stbi__rewind(s);

-		return 0;

-	}

-	stbi__skip(s, 6);

-	channelCount = stbi__get16be(s);

-	if (channelCount < 0 || channelCount > 16) {

-		stbi__rewind(s);

-		return 0;

-	}

-	(void)stbi__get32be(s);

-	(void)stbi__get32be(s);

-	depth = stbi__get16be(s);

-	if (depth != 16) {

-		stbi__rewind(s);

-		return 0;

-	}

-	return 1;

-}

-#endif

-#ifndef STBI_NO_PIC

-static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp) {

-	int act_comp = 0, num_packets = 0, chained, dummy;

-	stbi__pic_packet packets[10];

-	if (!x)

-		x = &dummy;

-	if (!y)

-		y = &dummy;

-	if (!comp)

-		comp = &dummy;

-	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {

-		stbi__rewind(s);

-		return 0;

-	}

-	stbi__skip(s, 88);

-	*x = stbi__get16be(s);

-	*y = stbi__get16be(s);

-	if (stbi__at_eof(s)) {

-		stbi__rewind(s);

-		return 0;

-	}

-	if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {

-		stbi__rewind(s);

-		return 0;

-	}

-	stbi__skip(s, 8);

-	do {

-		stbi__pic_packet* packet;

-		if (num_packets == sizeof(packets) / sizeof(packets[0]))

-			return 0;

-		packet = &packets[num_packets++];

-		chained = stbi__get8(s);

-		packet->size = stbi__get8(s);

-		packet->type = stbi__get8(s);

-		packet->channel = stbi__get8(s);

-		act_comp |= packet->channel;

-		if (stbi__at_eof(s)) {

-			stbi__rewind(s);

-			return 0;

-		}

-		if (packet->size != 8) {

-			stbi__rewind(s);

-			return 0;

-		}

-	} while (chained);

-	*comp = (act_comp & 0x10 ? 4 : 3);

-	return 1;

-}

-#endif

-// *************************************************************************************************

-// Portable Gray Map and Portable Pixel Map loader

-// by Ken Miller

-//

-// PGM: http://netpbm.sourceforge.net/doc/pgm.html

-// PPM: http://netpbm.sourceforge.net/doc/ppm.html

-//

-// Known limitations:

-//    Does not support comments in the header section

-//    Does not support ASCII image data (formats P2 and P3)

-//    Does not support 16-bit-per-channel

-#ifndef STBI_NO_PNM

-static int stbi__pnm_test(stbi__context* s) {

-	char p, t;

-	p = (char)stbi__get8(s);

-	t = (char)stbi__get8(s);

-	if (p != 'P' || (t != '5' && t != '6')) {

-		stbi__rewind(s);

-		return 0;

-	}

-	return 1;

-}

-static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

-	stbi_uc* out;

-	STBI_NOTUSED(ri);

-	if (!stbi__pnm_info(s, (int*)&s->img_x, (int*)&s->img_y, (int*)&s->img_n))

-		return 0;

-	*x = s->img_x;

-	*y = s->img_y;

-	if (comp)

-		*comp = s->img_n;

-	if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))

-		return stbi__errpuc("too large", "PNM too large");

-	out = (stbi_uc*)stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);

-	if (!out)

-		return stbi__errpuc("outofmem", "Out of memory");

-	stbi__getn(s, out, s->img_n * s->img_x * s->img_y);

-	if (req_comp && req_comp != s->img_n) {

-		out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);

-		if (out == NULL)

-			return out; // stbi__convert_format frees input on failure

-	}

-	return out;

-}

-static int stbi__pnm_isspace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; }

-static void stbi__pnm_skip_whitespace(stbi__context* s, char* c) {

-	for (;;) {

-		while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))

-			*c = (char)stbi__get8(s);

-		if (stbi__at_eof(s) || *c != '#')

-			break;

-		while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')

-			*c = (char)stbi__get8(s);

-	}

-}

-static int stbi__pnm_isdigit(char c) { return c >= '0' && c <= '9'; }

-static int stbi__pnm_getinteger(stbi__context* s, char* c) {

-	int value = 0;

-	while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {

-		value = value * 10 + (*c - '0');

-		*c = (char)stbi__get8(s);

-	}

-	return value;

-}

-static int stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp) {

-	int maxv, dummy;

-	char c, p, t;

-	if (!x)

-		x = &dummy;

-	if (!y)

-		y = &dummy;

-	if (!comp)

-		comp = &dummy;

-	stbi__rewind(s);

-	// Get identifier

-	p = (char)stbi__get8(s);

-	t = (char)stbi__get8(s);

-	if (p != 'P' || (t != '5' && t != '6')) {

-		stbi__rewind(s);

-		return 0;

-	}

-	*comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm

-	c = (char)stbi__get8(s);

-	stbi__pnm_skip_whitespace(s, &c);

-	*x = stbi__pnm_getinteger(s, &c); // read width

-	stbi__pnm_skip_whitespace(s, &c);

-	*y = stbi__pnm_getinteger(s, &c); // read height

-	stbi__pnm_skip_whitespace(s, &c);

-	maxv = stbi__pnm_getinteger(s, &c); // read max value

-	if (maxv > 255)

-		return stbi__err("max value > 255", "PPM image not 8-bit");

-	else

-		return 1;

-}

-#endif

-static int stbi__info_main(stbi__context* s, int* x, int* y, int* comp) {

-#ifndef STBI_NO_JPEG

-	if (stbi__jpeg_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_PNG

-	if (stbi__png_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_GIF

-	if (stbi__gif_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_BMP

-	if (stbi__bmp_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_PSD

-	if (stbi__psd_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_PIC

-	if (stbi__pic_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_PNM

-	if (stbi__pnm_info(s, x, y, comp))

-		return 1;

-#endif

-#ifndef STBI_NO_HDR

-	if (stbi__hdr_info(s, x, y, comp))

-		return 1;

-#endif

-// test tga last because it's a crappy test!

-#ifndef STBI_NO_TGA

-	if (stbi__tga_info(s, x, y, comp))

-		return 1;

-#endif

-	return stbi__err("unknown image type", "Image not of any known type, or corrupt");

-}

-static int stbi__is_16_main(stbi__context* s) {

-#ifndef STBI_NO_PNG

-	if (stbi__png_is16(s))

-		return 1;

-#endif

-#ifndef STBI_NO_PSD

-	if (stbi__psd_is16(s))

-		return 1;

-#endif

-	return 0;

-}

-#ifndef STBI_NO_STDIO

-STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp) {

-	FILE* f = stbi__fopen(filename, "rb");

-	int result;

-	if (!f)

-		return stbi__err("can't fopen", "Unable to open file");

-	result = stbi_info_from_file(f, x, y, comp);

-	fclose(f);

-	return result;

-}

-STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp) {

-	int r;

-	stbi__context s;

-	long pos = ftell(f);

-	stbi__start_file(&s, f);

-	r = stbi__info_main(&s, x, y, comp);

-	fseek(f, pos, SEEK_SET);

-	return r;

-}

-STBIDEF int stbi_is_16_bit(char const* filename) {

-	FILE* f = stbi__fopen(filename, "rb");

-	int result;

-	if (!f)

-		return stbi__err("can't fopen", "Unable to open file");

-	result = stbi_is_16_bit_from_file(f);

-	fclose(f);

-	return result;

-}

-STBIDEF int stbi_is_16_bit_from_file(FILE* f) {

-	int r;

-	stbi__context s;

-	long pos = ftell(f);

-	stbi__start_file(&s, f);

-	r = stbi__is_16_main(&s);

-	fseek(f, pos, SEEK_SET);

-	return r;

-}

-#endif // !STBI_NO_STDIO

-STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp) {

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	return stbi__info_main(&s, x, y, comp);

-}

-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* c, void* user, int* x, int* y, int* comp) {

-	stbi__context s;

-	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);

-	return stbi__info_main(&s, x, y, comp);

-}

-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len) {

-	stbi__context s;

-	stbi__start_mem(&s, buffer, len);

-	return stbi__is_16_main(&s);

-}

-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* c, void* user) {

-	stbi__context s;

-	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);

-	return stbi__is_16_main(&s);

-}

-#endif // STB_IMAGE_IMPLEMENTATION

-/*

-   revision history:

-	  2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and

-   platform ifdefs 2.19  (2018-02-11) fix warning 2.18  (2018-01-30) fix

-   warnings 2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug

-						 1-bit BMP

-						 *_is_16_bit api

-						 avoid warnings

-	  2.16  (2017-07-23) all functions have 16-bit variants;

-						 STBI_NO_STDIO works again;

-						 compilation fixes;

-						 fix rounding in unpremultiply;

-						 optimize vertical flip;

-						 disable raw_len validation;

-						 documentation fixes

-	  2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;

-						 warning fixes; disable run-time SSE detection on gcc;

-						 uniform handling of optional "return" values;

-						 thread-safe initialization of zlib tables

-	  2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet

-   JPGs 2.13  (2016-11-29) add 16-bit API, only supported for PNG right now 2.12

-   (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11  (2016-04-02)

-   allocate large structures on the stack remove white matting for transparent

-   PSD fix reported channel count for PNG & BMP re-enable SSE2 in non-gcc 64-bit

-						 support RGB-formatted JPEG

-						 read 16-bit PNGs (only as 8-bit)

-	  2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED

-	  2.09  (2016-01-16) allow comments in PNM files

-						 16-bit-per-pixel TGA (not bit-per-component)

-						 info() for TGA could break due to .hdr handling

-						 info() for BMP to shares code instead of sloppy parse

-						 can use STBI_REALLOC_SIZED if allocator doesn't support

-   realloc code cleanup 2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD

-   as RGBA 2.07  (2015-09-13) fix compiler warnings partial animated GIF support

-						 limited 16-bpc PSD support

-						 #ifdef unused functions

-						 bug with < 92 byte PIC,PNM,HDR,TGA

-	  2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value

-	  2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning

-	  2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit

-	  2.03  (2015-04-12) extra corruption checking (mmozeiko)

-						 stbi_set_flip_vertically_on_load (nguillemot)

-						 fix NEON support; fix mingw support

-	  2.02  (2015-01-19) fix incorrect assert, fix warning

-	  2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit

-   without -msse2 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG 2.00

-   (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) progressive

-   JPEG (stb) PGM/PPM support (Ken Miller) STBI_MALLOC,STBI_REALLOC,STBI_FREE

-						 GIF bugfix -- seemingly never worked

-						 STBI_NO_*, STBI_ONLY_*

-	  1.48  (2014-12-14) fix incorrectly-named assert()

-	  1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar

-   Cornut & stb) optimize PNG (ryg) fix bug in interlaced PNG with

-   user-specified channel count (stb) 1.46  (2014-08-26) fix broken tRNS chunk

-   (colorkey-style transparency) in non-paletted PNG 1.45  (2014-08-16) fix

-   MSVC-ARM internal compiler error by wrapping malloc 1.44  (2014-08-07)

-			  various warning fixes from Ronny Chevalier

-	  1.43  (2014-07-15)

-			  fix MSVC-only compiler problem in code changed in 1.42

-	  1.42  (2014-07-09)

-			  don't define _CRT_SECURE_NO_WARNINGS (affects user code)

-			  fixes to stbi__cleanup_jpeg path

-			  added STBI_ASSERT to avoid requiring assert.h

-	  1.41  (2014-06-25)

-			  fix search&replace from 1.36 that messed up comments/error

-   messages 1.40  (2014-06-22) fix gcc struct-initialization warning 1.39

-   (2014-06-15) fix to TGA optimization when req_comp != number of components in

-   TGA; fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my

-   test suite) add support for BMP version 5 (more ignored fields) 1.38

-   (2014-06-06) suppress MSVC warnings on integer casts truncating values fix

-   accidental rename of 'skip' field of I/O 1.37  (2014-06-04) remove duplicate

-   typedef 1.36  (2014-06-03) convert to header file single-file library if

-   de-iphone isn't set, load iphone images color-swapped instead of returning

-   NULL 1.35  (2014-05-27) various warnings fix broken STBI_SIMD path fix bug

-   where stbi_load_from_file no longer left file pointer in correct place fix

-   broken non-easy path for 32-bit BMP (possibly never used) TGA optimization by

-   Arseny Kapoulkine 1.34  (unknown) use STBI_NOTUSED in

-   stbi__resample_row_generic(), fix one more leak in tga failure case 1.33

-   (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor

-   compiler-friendly improvements 1.32  (2011-07-13) support for "info" function

-   for all supported filetypes (SpartanJ) 1.31  (2011-06-20) a few more leak

-   fixes, bug in PNG handling (SpartanJ) 1.30  (2011-06-11) added ability to

-   load files via callbacks to accomidate custom input streams (Ben Wenger)

-			  removed deprecated format-specific test/load functions

-			  removed support for installable file formats (stbi_loader) --

-   would have been broken for IO callbacks anyway error cases in bmp and tga

-   give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in

-   decoding 32-bit BMP (David Woo) 1.29  (2010-08-16) various warning fixes from

-   Aurelien Pocheville 1.28  (2010-08-01) fix bug in GIF palette transparency

-   (SpartanJ) 1.27  (2010-08-01) cast-to-stbi_uc to fix warnings 1.26

-   (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25

-   (2010-07-17) refix trans_data warning (Won Chun) 1.24  (2010-07-12) perf

-   improvements reading from files on platforms with lock-heavy fgetc() minor

-   perf improvements for jpeg deprecated type-specific functions so we'll get

-   feedback if they're needed attempt to fix trans_data warning (Won Chun) 1.23

-   fixed bug in iPhone support 1.22  (2010-07-10) removed image *writing*

-   support stbi_info support from Jetro Lauha GIF support from Jean-Marc Lienher

-			  iPhone PNG-extensions from James Brown

-			  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err.

-   Janez (U+017D)emva) 1.21    fix use of 'stbi_uc' in header (reported by jon

-   blow) 1.20    added support for Softimage PIC, by Tom Seddon 1.19    bug in

-   interlaced PNG corruption check (found by ryg) 1.18  (2008-08-02) fix a

-   threading bug (local mutable static) 1.17    support interlaced PNG 1.16

-   major bugfix - stbi__convert_format converted one too many pixels 1.15

-   initialize some fields for thread safety 1.14    fix threadsafe conversion

-   bug header-file-only version (#define STBI_HEADER_FILE_ONLY before including)

-	  1.13    threadsafe

-	  1.12    const qualifiers in the API

-	  1.11    Support installable IDCT, colorspace conversion routines

-	  1.10    Fixes for 64-bit (don't use "unsigned long")

-			  optimized upsampling by Fabian "ryg" Giesen

-	  1.09    Fix format-conversion for PSD code (bad global variables!)

-	  1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz

-	  1.07    attempt to fix C++ warning/errors again

-	  1.06    attempt to fix C++ warning/errors again

-	  1.05    fix TGA loading to return correct *comp and use good luminance

-   calc 1.04    default float alpha is 1, not 255; use 'void *' for

-   stbi_image_free 1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR 1.02 support

-   for (subset of) HDR files, float interface for preferred access to them 1.01

-   fix bug: possible bug in handling right-side up bmps... not sure fix bug: the

-   stbi__bmp_load() and stbi__tga_load() functions didn't work at all 1.00

-   interface to zlib that skips zlib header 0.99    correct handling of alpha in

-   palette 0.98    TGA loader by lonesock; dynamically add loaders (untested)

-	  0.97    jpeg errors on too large a file; also catch another malloc failure

-	  0.96    fix detection of invalid v value - particleman@mollyrocket forum

-	  0.95    during header scan, seek to markers in case of padding

-	  0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same

-	  0.93    handle jpegtran output; verbose errors

-	  0.92    read 4,8,16,24,32-bit BMP files of several formats

-	  0.91    output 24-bit Windows 3.0 BMP files

-	  0.90    fix a few more warnings; bump version number to approach 1.0

-	  0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd

-	  0.60    fix compiling as c++

-	  0.59    fix warnings: merge Dave Moore's -Wall fixes

-	  0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian

-	  0.57    fix bug: jpg last huffman symbol before marker was >9 bits but

-   less than 16 available 0.56    fix bug: zlib uncompressed mode len vs. nlen

-	  0.55    fix bug: restart_interval not initialized to 0

-	  0.54    allow NULL for 'int *comp'

-	  0.53    fix bug in png 3->4; speedup png decoding

-	  0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments

-	  0.51    obey req_comp requests, 1-component jpegs return as 1-component,

-			  on 'test' only check type, not whether we support this variant

-	  0.50  (2006-11-19)

-			  first released version

-*/

-/*

-------------------------------------------------------------------------------

-This software is available under 2 licenses -- choose whichever you prefer.

-------------------------------------------------------------------------------

-ALTERNATIVE A - MIT License

-Copyright (c) 2017 Sean Barrett

-Permission is hereby granted, free of charge, to any person obtaining a copy of

-this software and associated documentation files (the "Software"), to deal in

-the Software without restriction, including without limitation the rights to

-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

-of the Software, and to permit persons to whom the Software is furnished to do

-so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in all

-copies or substantial portions of the Software.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

-SOFTWARE.

-------------------------------------------------------------------------------

-ALTERNATIVE B - Public Domain (www.unlicense.org)

-This is free and unencumbered software released into the public domain.

-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

-software, either in source code form or as a compiled binary, for any purpose,

-commercial or non-commercial, and by any means.

-In jurisdictions that recognize copyright laws, the author or authors of this

-software dedicate any and all copyright interest in the software to the public

-domain. We make this dedication for the benefit of the public at large and to

-the detriment of our heirs and successors. We intend this dedication to be an

-overt act of relinquishment in perpetuity of all present and future rights to

-this software under copyright law.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-------------------------------------------------------------------------------

-*/

--- a/SDL_Examples/include/stb_image_write.h

+++ /dev/null

@@ -1,1733 +1,0 @@

-/* stb_image_write - v1.14 - public domain - http://nothings.org/stb

-   writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015

-									 no warranty implied; use at your own risk

-   Before #including,

-	   #define STB_IMAGE_WRITE_IMPLEMENTATION

-   in the file that you want to have the implementation.

-   Will probably not work correctly with strict-aliasing optimizations.

-ABOUT:

-   This header file is a library for writing images to C stdio or a callback.

-   The PNG output is not optimal; it is 20-50% larger than the file

-   written by a decent optimizing implementation; though providing a custom

-   zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.

-   This library is designed for source code compactness and simplicity,

-   not optimal image file size or run-time performance.

-BUILDING:

-   You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.

-   You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace

-   malloc,realloc,free.

-   You can #define STBIW_MEMMOVE() to replace memmove()

-   You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress

-function for PNG compression (instead of the builtin one), it must have the

-following signature: unsigned char * my_compress(unsigned char *data, int

-data_len, int *out_len, int quality); The returned data will be freed with

-STBIW_FREE() (free() by default), so it must be heap allocated with

-STBIW_MALLOC() (malloc() by default),

-UNICODE:

-   If compiling for Windows and you wish to use Unicode filenames, compile

-   with

-	   #define STBIW_WINDOWS_UTF8

-   and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert

-   Windows wchar_t filenames to utf8.

-USAGE:

-   There are five functions, one for each image file format:

-	 int stbi_write_png(char const *filename, int w, int h, int comp, const void

-*data, int stride_in_bytes); int stbi_write_bmp(char const *filename, int w, int

-h, int comp, const void *data); int stbi_write_tga(char const *filename, int w,

-int h, int comp, const void *data); int stbi_write_jpg(char const *filename, int

-w, int h, int comp, const void *data, int quality); int stbi_write_hdr(char

-const *filename, int w, int h, int comp, const float *data);

-	 void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip

-data vertically

-   There are also five equivalent functions that use an arbitrary write

-function. You are expected to open/close your file-equivalent before and after

-calling these:

-	 int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int

-h, int comp, const void  *data, int stride_in_bytes); int

-stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int

-comp, const void  *data); int stbi_write_tga_to_func(stbi_write_func *func, void

-*context, int w, int h, int comp, const void  *data); int

-stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int

-comp, const float *data); int stbi_write_jpg_to_func(stbi_write_func *func, void

-*context, int x, int y, int comp, const void *data, int quality);

-   where the callback is:

-	  void stbi_write_func(void *context, void *data, int size);

-   You can configure it with these global variables:

-	  int stbi_write_tga_with_rle;             // defaults to true; set to 0 to

-disable RLE int stbi_write_png_compression_level;    // defaults to 8; set to

-higher for more compression int stbi_write_force_png_filter;         // defaults

-to -1; set to 0..5 to force a filter mode

-   You can define STBI_WRITE_NO_STDIO to disable the file variant of these

-   functions, so the library will not use stdio.h at all. However, this will

-   also disable HDR writing, because it requires stdio for formatted output.

-   Each function returns 0 on failure and non-0 on success.

-   The functions create an image file defined by the parameters. The image

-   is a rectangle of pixels stored from left-to-right, top-to-bottom.

-   Each pixel contains 'comp' channels of data stored interleaved with 8-bits

-   per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is

-   monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.

-   The *data pointer points to the first byte of the top-left-most pixel.

-   For PNG, "stride_in_bytes" is the distance in bytes from the first byte of

-   a row of pixels to the first byte of the next row of pixels.

-   PNG creates output files with the same number of components as the input.

-   The BMP format expands Y to RGB in the file format and does not

-   output alpha.

-   PNG supports writing rectangles of data even when the bytes storing rows of

-   data are not consecutive in memory (e.g. sub-rectangles of a larger image),

-   by supplying the stride between the beginning of adjacent rows. The other

-   formats do not. (Thus you cannot write a native-format BMP through the BMP

-   writer, both because it is in BGR order and because it may have padding

-   at the end of the line.)

-   PNG allows you to set the deflate compression level by setting the global

-   variable 'stbi_write_png_compression_level' (it defaults to 8).

-   HDR expects linear float data. Since the format is always 32-bit rgb(e)

-   data, alpha (if provided) is discarded, and for monochrome data it is

-   replicated across all three channels.

-   TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed

-   data, set the global variable 'stbi_write_tga_with_rle' to 0.

-   JPEG does ignore alpha channels in input data; quality is between 1 and 100.

-   Higher quality looks better but results in a bigger image.

-   JPEG baseline (no JPEG progressive).

-CREDITS:

-   Sean Barrett           -    PNG/BMP/TGA

-   Baldur Karlsson        -    HDR

-   Jean-Sebastien Guay    -    TGA monochrome

-   Tim Kelsey             -    misc enhancements

-   Alan Hickman           -    TGA RLE

-   Emmanuel Julien        -    initial file IO callback implementation

-   Jon Olick              -    original jo_jpeg.cpp code

-   Daniel Gibson          -    integrate JPEG, allow external zlib

-   Aarni Koskela          -    allow choosing PNG filter

-   bugfixes:

-	  github:Chribba

-	  Guillaume Chereau

-	  github:jry2

-	  github:romigrou

-	  Sergio Gonzalez

-	  Jonas Karlsson

-	  Filip Wasil

-	  Thatcher Ulrich

-	  github:poppolopoppo

-	  Patrick Boettcher

-	  github:xeekworx

-	  Cap Petschulat

-	  Simon Rodriguez

-	  Ivan Tikhonov

-	  github:ignotion

-	  Adam Schackart

-LICENSE

-  See end of file for license information.

-*/

-#ifndef INCLUDE_STB_IMAGE_WRITE_H

-#define INCLUDE_STB_IMAGE_WRITE_H

-#include <stdlib.h>

-// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline'

-// or 'static inline'

-#ifndef STBIWDEF

-#ifdef STB_IMAGE_WRITE_STATIC

-#define STBIWDEF static

-#else

-#ifdef __cplusplus

-#define STBIWDEF extern "C"

-#else

-#define STBIWDEF extern

-#endif

-#endif

-#endif

-#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations

-extern int stbi_write_tga_with_rle;

-extern int stbi_write_png_compression_level;

-extern int stbi_write_force_png_filter;

-#endif

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_png(char const* filename, int w, int h, int comp, const void* data, int stride_in_bytes);

-STBIWDEF int stbi_write_bmp(char const* filename, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_tga(char const* filename, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_hdr(char const* filename, int w, int h, int comp, const float* data);

-STBIWDEF int stbi_write_jpg(char const* filename, int x, int y, int comp, const void* data, int quality);

-#ifdef STBI_WINDOWS_UTF8

-STBIWDEF int stbiw_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input);

-#endif

-#endif

-typedef void stbi_write_func(void* context, void* data, int size);

-STBIWDEF int stbi_write_png_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data, int stride_in_bytes);

-STBIWDEF int stbi_write_bmp_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_tga_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data);

-STBIWDEF int stbi_write_hdr_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const float* data);

-STBIWDEF int stbi_write_jpg_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int quality);

-STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);

-#endif // INCLUDE_STB_IMAGE_WRITE_H

-#ifdef STB_IMAGE_WRITE_IMPLEMENTATION

-#ifdef _WIN32

-#ifndef _CRT_SECURE_NO_WARNINGS

-#define _CRT_SECURE_NO_WARNINGS

-#endif

-#ifndef _CRT_NONSTDC_NO_DEPRECATE

-#define _CRT_NONSTDC_NO_DEPRECATE

-#endif

-#endif

-#ifndef STBI_WRITE_NO_STDIO

-#include <stdio.h>

-#endif // STBI_WRITE_NO_STDIO

-#include <math.h>

-#include <stdarg.h>

-#include <stdlib.h>

-#include <string.h>

-#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))

-// ok

-#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)

-// ok

-#else

-#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."

-#endif

-#ifndef STBIW_MALLOC

-#define STBIW_MALLOC(sz) malloc(sz)

-#define STBIW_REALLOC(p, newsz) realloc(p, newsz)

-#define STBIW_FREE(p) free(p)

-#endif

-#ifndef STBIW_REALLOC_SIZED

-#define STBIW_REALLOC_SIZED(p, oldsz, newsz) STBIW_REALLOC(p, newsz)

-#endif

-#ifndef STBIW_MEMMOVE

-#define STBIW_MEMMOVE(a, b, sz) memmove(a, b, sz)

-#endif

-#ifndef STBIW_ASSERT

-#include <assert.h>

-#define STBIW_ASSERT(x) assert(x)

-#endif

-#define STBIW_UCHAR(x) (unsigned char)((x)&0xff)

-#ifdef STB_IMAGE_WRITE_STATIC

-static int stbi_write_png_compression_level = 8;

-static int stbi_write_tga_with_rle = 1;

-static int stbi_write_force_png_filter = -1;

-#else

-int stbi_write_png_compression_level = 8;

-int stbi_write_tga_with_rle = 1;

-int stbi_write_force_png_filter = -1;

-#endif

-static int stbi__flip_vertically_on_write = 0;

-STBIWDEF void stbi_flip_vertically_on_write(int flag) { stbi__flip_vertically_on_write = flag; }

-typedef struct {

-	stbi_write_func* func;

-	void* context;

-} stbi__write_context;

-// initialize a callback-based context

-static void stbi__start_write_callbacks(stbi__write_context* s, stbi_write_func* c, void* context) {

-	s->func = c;

-	s->context = context;

-}

-#ifndef STBI_WRITE_NO_STDIO

-static void stbi__stdio_write(void* context, void* data, int size) { fwrite(data, 1, size, (FILE*)context); }

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-#ifdef __cplusplus

-#define STBIW_EXTERN extern "C"

-#else

-#define STBIW_EXTERN extern

-#endif

-STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char* str, int cbmb, wchar_t* widestr,

-																	 int cchwide);

-STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t* widestr, int cchwide, char* str,

-																	 int cbmb, const char* defchar, int* used_default);

-STBIWDEF int stbiw_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input) {

-	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);

-}

-#endif

-static FILE* stbiw__fopen(char const* filename, char const* mode) {

-	FILE* f;

-#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

-	wchar_t wMode[64];

-	wchar_t wFilename[1024];

-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))

-		return 0;

-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))

-		return 0;

-#if _MSC_VER >= 1400

-	if (0 != _wfopen_s(&f, wFilename, wMode))

-		f = 0;

-#else

-	f = _wfopen(wFilename, wMode);

-#endif

-#elif defined(_MSC_VER) && _MSC_VER >= 1400

-	if (0 != fopen_s(&f, filename, mode))

-		f = 0;

-#else

-	f = fopen(filename, mode);

-#endif

-	return f;

-}

-static int stbi__start_write_file(stbi__write_context* s, const char* filename) {

-	FILE* f = stbiw__fopen(filename, "wb");

-	stbi__start_write_callbacks(s, stbi__stdio_write, (void*)f);

-	return f != NULL;

-}

-static void stbi__end_write_file(stbi__write_context* s) { fclose((FILE*)s->context); }

-#endif // !STBI_WRITE_NO_STDIO

-typedef unsigned int stbiw_uint32;

-typedef int stb_image_write_test[sizeof(stbiw_uint32) == 4 ? 1 : -1];

-static void stbiw__writefv(stbi__write_context* s, const char* fmt, va_list v) {

-	while (*fmt) {

-		switch (*fmt++) {

-		case ' ':

-			break;

-		case '1': {

-			unsigned char x = STBIW_UCHAR(va_arg(v, int));

-			s->func(s->context, &x, 1);

-			break;

-		}

-		case '2': {

-			int x = va_arg(v, int);

-			unsigned char b[2];

-			b[0] = STBIW_UCHAR(x);

-			b[1] = STBIW_UCHAR(x >> 8);

-			s->func(s->context, b, 2);

-			break;

-		}

-		case '4': {

-			stbiw_uint32 x = va_arg(v, int);

-			unsigned char b[4];

-			b[0] = STBIW_UCHAR(x);

-			b[1] = STBIW_UCHAR(x >> 8);

-			b[2] = STBIW_UCHAR(x >> 16);

-			b[3] = STBIW_UCHAR(x >> 24);

-			s->func(s->context, b, 4);

-			break;

-		}

-		default:

-			STBIW_ASSERT(0);

-			return;

-		}

-	}

-}

-static void stbiw__writef(stbi__write_context* s, const char* fmt, ...) {

-	va_list v;

-	va_start(v, fmt);

-	stbiw__writefv(s, fmt, v);

-	va_end(v);

-}

-static void stbiw__putc(stbi__write_context* s, unsigned char c) { s->func(s->context, &c, 1); }

-static void stbiw__write3(stbi__write_context* s, unsigned char a, unsigned char b, unsigned char c) {

-	unsigned char arr[3];

-	arr[0] = a;

-	arr[1] = b;

-	arr[2] = c;

-	s->func(s->context, arr, 3);

-}

-static void stbiw__write_pixel(stbi__write_context* s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char* d) {

-	unsigned char bg[3] = {255, 0, 255}, px[3];

-	int k;

-	if (write_alpha < 0)

-		s->func(s->context, &d[comp - 1], 1);

-	switch (comp) {

-	case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as

-			// 1-channel case

-	case 1:

-		if (expand_mono)

-			stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp

-		else

-			s->func(s->context, d, 1); // monochrome TGA

-		break;

-	case 4:

-		if (!write_alpha) {

-			// composite against pink background

-			for (k = 0; k < 3; ++k)

-				px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;

-			stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);

-			break;

-		}

-		/* FALLTHROUGH */

-	case 3:

-		stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);

-		break;

-	}

-	if (write_alpha > 0)

-		s->func(s->context, &d[comp - 1], 1);

-}

-static void stbiw__write_pixels(stbi__write_context* s, int rgb_dir, int vdir, int x, int y, int comp, void* data, int write_alpha, int scanline_pad,

-								int expand_mono) {

-	stbiw_uint32 zero = 0;

-	int i, j, j_end;

-	if (y <= 0)

-		return;

-	if (stbi__flip_vertically_on_write)

-		vdir *= -1;

-	if (vdir < 0) {

-		j_end = -1;

-		j = y - 1;

-	} else {

-		j_end = y;

-		j = 0;

-	}

-	for (; j != j_end; j += vdir) {

-		for (i = 0; i < x; ++i) {

-			unsigned char* d = (unsigned char*)data + (j * x + i) * comp;

-			stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);

-		}

-		s->func(s->context, &zero, scanline_pad);

-	}

-}

-static int stbiw__outfile(stbi__write_context* s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void* data, int alpha, int pad,

-						  const char* fmt, ...) {

-	if (y < 0 || x < 0) {

-		return 0;

-	} else {

-		va_list v;

-		va_start(v, fmt);

-		stbiw__writefv(s, fmt, v);

-		va_end(v);

-		stbiw__write_pixels(s, rgb_dir, vdir, x, y, comp, data, alpha, pad, expand_mono);

-		return 1;

-	}

-}

-static int stbi_write_bmp_core(stbi__write_context* s, int x, int y, int comp, const void* data) {

-	int pad = (-x * 3) & 3;

-	return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void*)data, 0, pad,

-						  "11 4 22 4"

-						  "4 44 22 444444",

-						  'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0,

-						  14 + 40,							  // file header

-						  40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header

-}

-STBIWDEF int stbi_write_bmp_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_bmp_core(&s, x, y, comp, data);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_bmp(char const* filename, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_bmp_core(&s, x, y, comp, data);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif //! STBI_WRITE_NO_STDIO

-static int stbi_write_tga_core(stbi__write_context* s, int x, int y, int comp, void* data) {

-	int has_alpha = (comp == 2 || comp == 4);

-	int colorbytes = has_alpha ? comp - 1 : comp;

-	int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3

-	if (y < 0 || x < 0)

-		return 0;

-	if (!stbi_write_tga_with_rle) {

-		return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void*)data, has_alpha, 0, "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y,

-							  (colorbytes + has_alpha) * 8, has_alpha * 8);

-	} else {

-		int i, j, k;

-		int jend, jdir;

-		stbiw__writef(s, "111 221 2222 11", 0, 0, format + 8, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);

-		if (stbi__flip_vertically_on_write) {

-			j = 0;

-			jend = y;

-			jdir = 1;

-		} else {

-			j = y - 1;

-			jend = -1;

-			jdir = -1;

-		}

-		for (; j != jend; j += jdir) {

-			unsigned char* row = (unsigned char*)data + j * x * comp;

-			int len;

-			for (i = 0; i < x; i += len) {

-				unsigned char* begin = row + i * comp;

-				int diff = 1;

-				len = 1;

-				if (i < x - 1) {

-					++len;

-					diff = memcmp(begin, row + (i + 1) * comp, comp);

-					if (diff) {

-						const unsigned char* prev = begin;

-						for (k = i + 2; k < x && len < 128; ++k) {

-							if (memcmp(prev, row + k * comp, comp)) {

-								prev += comp;

-								++len;

-							} else {

-								--len;

-								break;

-							}

-						}

-					} else {

-						for (k = i + 2; k < x && len < 128; ++k) {

-							if (!memcmp(begin, row + k * comp, comp)) {

-								++len;

-							} else {

-								break;

-							}

-						}

-					}

-				}

-				if (diff) {

-					unsigned char header = STBIW_UCHAR(len - 1);

-					s->func(s->context, &header, 1);

-					for (k = 0; k < len; ++k) {

-						stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);

-					}

-				} else {

-					unsigned char header = STBIW_UCHAR(len - 129);

-					s->func(s->context, &header, 1);

-					stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);

-				}

-			}

-		}

-	}

-	return 1;

-}

-STBIWDEF int stbi_write_tga_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_tga_core(&s, x, y, comp, (void*)data);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_tga(char const* filename, int x, int y, int comp, const void* data) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_tga_core(&s, x, y, comp, (void*)data);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif

-// *************************************************************************************************

-// Radiance RGBE HDR writer

-// by Baldur Karlsson

-#define stbiw__max(a, b) ((a) > (b) ? (a) : (b))

-static void stbiw__linear_to_rgbe(unsigned char* rgbe, float* linear) {

-	int exponent;

-	float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));

-	if (maxcomp < 1e-32f) {

-		rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;

-	} else {

-		float normalize = (float)frexp(maxcomp, &exponent) * 256.0f / maxcomp;

-		rgbe[0] = (unsigned char)(linear[0] * normalize);

-		rgbe[1] = (unsigned char)(linear[1] * normalize);

-		rgbe[2] = (unsigned char)(linear[2] * normalize);

-		rgbe[3] = (unsigned char)(exponent + 128);

-	}

-}

-static void stbiw__write_run_data(stbi__write_context* s, int length, unsigned char databyte) {

-	unsigned char lengthbyte = STBIW_UCHAR(length + 128);

-	STBIW_ASSERT(length + 128 <= 255);

-	s->func(s->context, &lengthbyte, 1);

-	s->func(s->context, &databyte, 1);

-}

-static void stbiw__write_dump_data(stbi__write_context* s, int length, unsigned char* data) {

-	unsigned char lengthbyte = STBIW_UCHAR(length);

-	STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code

-	s->func(s->context, &lengthbyte, 1);

-	s->func(s->context, data, length);

-}

-static void stbiw__write_hdr_scanline(stbi__write_context* s, int width, int ncomp, unsigned char* scratch, float* scanline) {

-	unsigned char scanlineheader[4] = {2, 2, 0, 0};

-	unsigned char rgbe[4];

-	float linear[3];

-	int x;

-	scanlineheader[2] = (width & 0xff00) >> 8;

-	scanlineheader[3] = (width & 0x00ff);

-	/* skip RLE for images too small or large */

-	if (width < 8 || width >= 32768) {

-		for (x = 0; x < width; x++) {

-			switch (ncomp) {

-			case 4: /* fallthrough */

-			case 3:

-				linear[2] = scanline[x * ncomp + 2];

-				linear[1] = scanline[x * ncomp + 1];

-				linear[0] = scanline[x * ncomp + 0];

-				break;

-			default:

-				linear[0] = linear[1] = linear[2] = scanline[x * ncomp + 0];

-				break;

-			}

-			stbiw__linear_to_rgbe(rgbe, linear);

-			s->func(s->context, rgbe, 4);

-		}

-	} else {

-		int c, r;

-		/* encode into scratch buffer */

-		for (x = 0; x < width; x++) {

-			switch (ncomp) {

-			case 4: /* fallthrough */

-			case 3:

-				linear[2] = scanline[x * ncomp + 2];

-				linear[1] = scanline[x * ncomp + 1];

-				linear[0] = scanline[x * ncomp + 0];

-				break;

-			default:

-				linear[0] = linear[1] = linear[2] = scanline[x * ncomp + 0];

-				break;

-			}

-			stbiw__linear_to_rgbe(rgbe, linear);

-			scratch[x + width * 0] = rgbe[0];

-			scratch[x + width * 1] = rgbe[1];

-			scratch[x + width * 2] = rgbe[2];

-			scratch[x + width * 3] = rgbe[3];

-		}

-		s->func(s->context, scanlineheader, 4);

-		/* RLE each component separately */

-		for (c = 0; c < 4; c++) {

-			unsigned char* comp = &scratch[width * c];

-			x = 0;

-			while (x < width) {

-				// find first run

-				r = x;

-				while (r + 2 < width) {

-					if (comp[r] == comp[r + 1] && comp[r] == comp[r + 2])

-						break;

-					++r;

-				}

-				if (r + 2 >= width)

-					r = width;

-				// dump up to first run

-				while (x < r) {

-					int len = r - x;

-					if (len > 128)

-						len = 128;

-					stbiw__write_dump_data(s, len, &comp[x]);

-					x += len;

-				}

-				// if there's a run, output it

-				if (r + 2 < width) { // same test as what we break out of in

-									 // search loop, so only true if we break'd

-					// find next byte after run

-					while (r < width && comp[r] == comp[x])

-						++r;

-					// output run up to r

-					while (x < r) {

-						int len = r - x;

-						if (len > 127)

-							len = 127;

-						stbiw__write_run_data(s, len, comp[x]);

-						x += len;

-					}

-				}

-			}

-		}

-	}

-}

-static int stbi_write_hdr_core(stbi__write_context* s, int x, int y, int comp, float* data) {

-	if (y <= 0 || x <= 0 || data == NULL)

-		return 0;

-	else {

-		// Each component is stored separately. Allocate scratch space for full

-		// output scanline.

-		unsigned char* scratch = (unsigned char*)STBIW_MALLOC(x * 4);

-		int i, len;

-		char buffer[128];

-		char header[] = "#?RADIANCE\n# Written by "

-						"stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";

-		s->func(s->context, header, sizeof(header) - 1);

-#ifdef __STDC_WANT_SECURE_LIB__

-		len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);

-#else

-		len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);

-#endif

-		s->func(s->context, buffer, len);

-		for (i = 0; i < y; i++)

-			stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp * x * (stbi__flip_vertically_on_write ? y - 1 - i : i));

-		STBIW_FREE(scratch);

-		return 1;

-	}

-}

-STBIWDEF int stbi_write_hdr_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const float* data) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_hdr_core(&s, x, y, comp, (float*)data);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_hdr(char const* filename, int x, int y, int comp, const float* data) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_hdr_core(&s, x, y, comp, (float*)data);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif // STBI_WRITE_NO_STDIO

-//////////////////////////////////////////////////////////////////////////////

-//

-// PNG writer

-//

-#ifndef STBIW_ZLIB_COMPRESS

-// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount()

-// == vector<>::size()

-#define stbiw__sbraw(a) ((int*)(void*)(a)-2)

-#define stbiw__sbm(a) stbiw__sbraw(a)[0]

-#define stbiw__sbn(a) stbiw__sbraw(a)[1]

-#define stbiw__sbneedgrow(a, n) ((a) == 0 || stbiw__sbn(a) + n >= stbiw__sbm(a))

-#define stbiw__sbmaybegrow(a, n) (stbiw__sbneedgrow(a, (n)) ? stbiw__sbgrow(a, n) : 0)

-#define stbiw__sbgrow(a, n) stbiw__sbgrowf((void**)&(a), (n), sizeof(*(a)))

-#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a, 1), (a)[stbiw__sbn(a)++] = (v))

-#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)

-#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)), 0 : 0)

-static void* stbiw__sbgrowf(void** arr, int increment, int itemsize) {

-	int m = *arr ? 2 * stbiw__sbm(*arr) + increment : increment + 1;

-	void* p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr) * itemsize + sizeof(int) * 2) : 0, itemsize * m + sizeof(int) * 2);

-	STBIW_ASSERT(p);

-	if (p) {

-		if (!*arr)

-			((int*)p)[1] = 0;

-		*arr = (void*)((int*)p + 2);

-		stbiw__sbm(*arr) = m;

-	}

-	return *arr;

-}

-static unsigned char* stbiw__zlib_flushf(unsigned char* data, unsigned int* bitbuffer, int* bitcount) {

-	while (*bitcount >= 8) {

-		stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));

-		*bitbuffer >>= 8;

-		*bitcount -= 8;

-	}

-	return data;

-}

-static int stbiw__zlib_bitrev(int code, int codebits) {

-	int res = 0;

-	while (codebits--) {

-		res = (res << 1) | (code & 1);

-		code >>= 1;

-	}

-	return res;

-}

-static unsigned int stbiw__zlib_countm(unsigned char* a, unsigned char* b, int limit) {

-	int i;

-	for (i = 0; i < limit && i < 258; ++i)

-		if (a[i] != b[i])

-			break;

-	return i;

-}

-static unsigned int stbiw__zhash(unsigned char* data) {

-	stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);

-	hash ^= hash << 3;

-	hash += hash >> 5;

-	hash ^= hash << 4;

-	hash += hash >> 17;

-	hash ^= hash << 25;

-	hash += hash >> 6;

-	return hash;

-}

-#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))

-#define stbiw__zlib_add(code, codebits) (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())

-#define stbiw__zlib_huffa(b, c) stbiw__zlib_add(stbiw__zlib_bitrev(b, c), c)

-// default huffman tables

-#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)

-#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)

-#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256, 7)

-#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280, 8)

-#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))

-#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))

-#define stbiw__ZHASH 16384

-#endif // STBIW_ZLIB_COMPRESS

-STBIWDEF unsigned char* stbi_zlib_compress(unsigned char* data, int data_len, int* out_len, int quality) {

-#ifdef STBIW_ZLIB_COMPRESS

-	// user provided a zlib compress implementation, use that

-	return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);

-#else  // use builtin

-	static unsigned short lengthc[] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 259};

-	static unsigned char lengtheb[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};

-	static unsigned short distc[] = {1,   2,   3,   4,   5,	7,	9,	13,   17,   25,   33,   49,	65,	97,	129,  193,

-									 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32768};

-	static unsigned char disteb[] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};

-	unsigned int bitbuf = 0;

-	int i, j, bitcount = 0;

-	unsigned char* out = NULL;

-	unsigned char*** hash_table = (unsigned char***)STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**));

-	if (hash_table == NULL)

-		return NULL;

-	if (quality < 5)

-		quality = 5;

-	stbiw__sbpush(out, 0x78); // DEFLATE 32K window

-	stbiw__sbpush(out, 0x5e); // FLEVEL = 1

-	stbiw__zlib_add(1, 1);	// BFINAL = 1

-	stbiw__zlib_add(1, 2);	// BTYPE = 1 -- fixed huffman

-	for (i = 0; i < stbiw__ZHASH; ++i)

-		hash_table[i] = NULL;

-	i = 0;

-	while (i < data_len - 3) {

-		// hash next 3 bytes of data to be compressed

-		int h = stbiw__zhash(data + i) & (stbiw__ZHASH - 1), best = 3;

-		unsigned char* bestloc = 0;

-		unsigned char** hlist = hash_table[h];

-		int n = stbiw__sbcount(hlist);

-		for (j = 0; j < n; ++j) {

-			if (hlist[j] - data > i - 32768) { // if entry lies within window

-				int d = stbiw__zlib_countm(hlist[j], data + i, data_len - i);

-				if (d >= best) {

-					best = d;

-					bestloc = hlist[j];

-				}

-			}

-		}

-		// when hash table entry is too long, delete half the entries

-		if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2 * quality) {

-			STBIW_MEMMOVE(hash_table[h], hash_table[h] + quality, sizeof(hash_table[h][0]) * quality);

-			stbiw__sbn(hash_table[h]) = quality;

-		}

-		stbiw__sbpush(hash_table[h], data + i);

-		if (bestloc) {

-			// "lazy matching" - check match at *next* byte, and if it's better,

-			// do cur byte as literal

-			h = stbiw__zhash(data + i + 1) & (stbiw__ZHASH - 1);

-			hlist = hash_table[h];

-			n = stbiw__sbcount(hlist);

-			for (j = 0; j < n; ++j) {

-				if (hlist[j] - data > i - 32767) {

-					int e = stbiw__zlib_countm(hlist[j], data + i + 1, data_len - i - 1);

-					if (e > best) { // if next match is better, bail on current

-									// match

-						bestloc = NULL;

-						break;

-					}

-				}

-			}

-		}

-		if (bestloc) {

-			int d = (int)(data + i - bestloc); // distance back

-			STBIW_ASSERT(d <= 32767 && best <= 258);

-			for (j = 0; best > lengthc[j + 1] - 1; ++j)

-				;

-			stbiw__zlib_huff(j + 257);

-			if (lengtheb[j])

-				stbiw__zlib_add(best - lengthc[j], lengtheb[j]);

-			for (j = 0; d > distc[j + 1] - 1; ++j)

-				;

-			stbiw__zlib_add(stbiw__zlib_bitrev(j, 5), 5);

-			if (disteb[j])

-				stbiw__zlib_add(d - distc[j], disteb[j]);

-			i += best;

-		} else {

-			stbiw__zlib_huffb(data[i]);

-			++i;

-		}

-	}

-	// write out final bytes

-	for (; i < data_len; ++i)

-		stbiw__zlib_huffb(data[i]);

-	stbiw__zlib_huff(256); // end of block

-	// pad with 0 bits to byte boundary

-	while (bitcount)

-		stbiw__zlib_add(0, 1);

-	for (i = 0; i < stbiw__ZHASH; ++i)

-		(void)stbiw__sbfree(hash_table[i]);

-	STBIW_FREE(hash_table);

-	{

-		// compute adler32 on input

-		unsigned int s1 = 1, s2 = 0;

-		int blocklen = (int)(data_len % 5552);

-		j = 0;

-		while (j < data_len) {

-			for (i = 0; i < blocklen; ++i) {

-				s1 += data[j + i];

-				s2 += s1;

-			}

-			s1 %= 65521;

-			s2 %= 65521;

-			j += blocklen;

-			blocklen = 5552;

-		}

-		stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));

-		stbiw__sbpush(out, STBIW_UCHAR(s2));

-		stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));

-		stbiw__sbpush(out, STBIW_UCHAR(s1));

-	}

-	*out_len = stbiw__sbn(out);

-	// make returned pointer freeable

-	STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);

-	return (unsigned char*)stbiw__sbraw(out);

-#endif // STBIW_ZLIB_COMPRESS

-}

-static unsigned int stbiw__crc32(unsigned char* buffer, int len) {

-#ifdef STBIW_CRC32

-	return STBIW_CRC32(buffer, len);

-#else

-	static unsigned int crc_table[256] = {

-		0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,

-		0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,

-		0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,

-		0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,

-		0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,

-		0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,

-		0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,

-		0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,

-		0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,

-		0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,

-		0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,

-		0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,

-		0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,

-		0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,

-		0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,

-		0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,

-		0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,

-		0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,

-		0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,

-		0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,

-		0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,

-		0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D};

-	unsigned int crc = ~0u;

-	int i;

-	for (i = 0; i < len; ++i)

-		crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];

-	return ~crc;

-#endif

-}

-#define stbiw__wpng4(o, a, b, c, d) ((o)[0] = STBIW_UCHAR(a), (o)[1] = STBIW_UCHAR(b), (o)[2] = STBIW_UCHAR(c), (o)[3] = STBIW_UCHAR(d), (o) += 4)

-#define stbiw__wp32(data, v) stbiw__wpng4(data, (v) >> 24, (v) >> 16, (v) >> 8, (v));

-#define stbiw__wptag(data, s) stbiw__wpng4(data, s[0], s[1], s[2], s[3])

-static void stbiw__wpcrc(unsigned char** data, int len) {

-	unsigned int crc = stbiw__crc32(*data - len - 4, len + 4);

-	stbiw__wp32(*data, crc);

-}

-static unsigned char stbiw__paeth(int a, int b, int c) {

-	int p = a + b - c, pa = abs(p - a), pb = abs(p - b), pc = abs(p - c);

-	if (pa <= pb && pa <= pc)

-		return STBIW_UCHAR(a);

-	if (pb <= pc)

-		return STBIW_UCHAR(b);

-	return STBIW_UCHAR(c);

-}

-// @OPTIMIZE: provide an option that always forces left-predict or paeth predict

-static void stbiw__encode_png_line(unsigned char* pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char* line_buffer) {

-	static int mapping[] = {0, 1, 2, 3, 4};

-	static int firstmap[] = {0, 1, 0, 5, 6};

-	int* mymap = (y != 0) ? mapping : firstmap;

-	int i;

-	int type = mymap[filter_type];

-	unsigned char* z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height - 1 - y : y);

-	int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;

-	if (type == 0) {

-		memcpy(line_buffer, z, width * n);

-		return;

-	}

-	// first loop isn't optimized since it's just one pixel

-	for (i = 0; i < n; ++i) {

-		switch (type) {

-		case 1:

-			line_buffer[i] = z[i];

-			break;

-		case 2:

-			line_buffer[i] = z[i] - z[i - signed_stride];

-			break;

-		case 3:

-			line_buffer[i] = z[i] - (z[i - signed_stride] >> 1);

-			break;

-		case 4:

-			line_buffer[i] = (signed char)(z[i] - stbiw__paeth(0, z[i - signed_stride], 0));

-			break;

-		case 5:

-			line_buffer[i] = z[i];

-			break;

-		case 6:

-			line_buffer[i] = z[i];

-			break;

-		}

-	}

-	switch (type) {

-	case 1:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - z[i - n];

-		break;

-	case 2:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - z[i - signed_stride];

-		break;

-	case 3:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - ((z[i - n] + z[i - signed_stride]) >> 1);

-		break;

-	case 4:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - stbiw__paeth(z[i - n], z[i - signed_stride], z[i - signed_stride - n]);

-		break;

-	case 5:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - (z[i - n] >> 1);

-		break;

-	case 6:

-		for (i = n; i < width * n; ++i)

-			line_buffer[i] = z[i] - stbiw__paeth(z[i - n], 0, 0);

-		break;

-	}

-}

-STBIWDEF unsigned char* stbi_write_png_to_mem(const unsigned char* pixels, int stride_bytes, int x, int y, int n, int* out_len) {

-	int force_filter = stbi_write_force_png_filter;

-	int ctype[5] = {-1, 0, 4, 2, 6};

-	unsigned char sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};

-	unsigned char *out, *o, *filt, *zlib;

-	signed char* line_buffer;

-	int j, zlen;

-	if (stride_bytes == 0)

-		stride_bytes = x * n;

-	if (force_filter >= 5) {

-		force_filter = -1;

-	}

-	filt = (unsigned char*)STBIW_MALLOC((x * n + 1) * y);

-	if (!filt)

-		return 0;

-	line_buffer = (signed char*)STBIW_MALLOC(x * n);

-	if (!line_buffer) {

-		STBIW_FREE(filt);

-		return 0;

-	}

-	for (j = 0; j < y; ++j) {

-		int filter_type;

-		if (force_filter > -1) {

-			filter_type = force_filter;

-			stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);

-		} else { // Estimate the best filter by running through all of them:

-			int best_filter = 0, best_filter_val = 0x7fffffff, est, i;

-			for (filter_type = 0; filter_type < 5; filter_type++) {

-				stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);

-				// Estimate the entropy of the line using this filter; the less,

-				// the better.

-				est = 0;

-				for (i = 0; i < x * n; ++i) {

-					est += abs((signed char)line_buffer[i]);

-				}

-				if (est < best_filter_val) {

-					best_filter_val = est;

-					best_filter = filter_type;

-				}

-			}

-			if (filter_type != best_filter) { // If the last iteration already got us

-											  // the best filter, don't redo it

-				stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);

-				filter_type = best_filter;

-			}

-		}

-		// when we get here, filter_type contains the filter type, and

-		// line_buffer contains the data

-		filt[j * (x * n + 1)] = (unsigned char)filter_type;

-		STBIW_MEMMOVE(filt + j * (x * n + 1) + 1, line_buffer, x * n);

-	}

-	STBIW_FREE(line_buffer);

-	zlib = stbi_zlib_compress(filt, y * (x * n + 1), &zlen, stbi_write_png_compression_level);

-	STBIW_FREE(filt);

-	if (!zlib)

-		return 0;

-	// each tag requires 12 bytes of overhead

-	out = (unsigned char*)STBIW_MALLOC(8 + 12 + 13 + 12 + zlen + 12);

-	if (!out)

-		return 0;

-	*out_len = 8 + 12 + 13 + 12 + zlen + 12;

-	o = out;

-	STBIW_MEMMOVE(o, sig, 8);

-	o += 8;

-	stbiw__wp32(o, 13); // header length

-	stbiw__wptag(o, "IHDR");

-	stbiw__wp32(o, x);

-	stbiw__wp32(o, y);

-	*o++ = 8;

-	*o++ = STBIW_UCHAR(ctype[n]);

-	*o++ = 0;

-	*o++ = 0;

-	*o++ = 0;

-	stbiw__wpcrc(&o, 13);

-	stbiw__wp32(o, zlen);

-	stbiw__wptag(o, "IDAT");

-	STBIW_MEMMOVE(o, zlib, zlen);

-	o += zlen;

-	STBIW_FREE(zlib);

-	stbiw__wpcrc(&o, zlen);

-	stbiw__wp32(o, 0);

-	stbiw__wptag(o, "IEND");

-	stbiw__wpcrc(&o, 0);

-	STBIW_ASSERT(o == out + *out_len);

-	return out;

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_png(char const* filename, int x, int y, int comp, const void* data, int stride_bytes) {

-	FILE* f;

-	int len;

-	unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len);

-	if (png == NULL)

-		return 0;

-	f = stbiw__fopen(filename, "wb");

-	if (!f) {

-		STBIW_FREE(png);

-		return 0;

-	}

-	fwrite(png, 1, len, f);

-	fclose(f);

-	STBIW_FREE(png);

-	return 1;

-}

-#endif

-STBIWDEF int stbi_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes) {

-	int len;

-	unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len);

-	if (png == NULL)

-		return 0;

-	func(context, png, len);

-	STBIW_FREE(png);

-	return 1;

-}

-/* ***************************************************************************

- *

- * JPEG writer

- *

- * This is based on Jon Olick's jo_jpeg.cpp:

- * public domain Simple, Minimalistic JPEG writer -

- * http://www.jonolick.com/code.html

- */

-static const unsigned char stbiw__jpg_ZigZag[] = {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42, 3,  8,  12, 17, 25, 30,

-												  41, 43, 9,  11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38,

-												  46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};

-static void stbiw__jpg_writeBits(stbi__write_context* s, int* bitBufP, int* bitCntP, const unsigned short* bs) {

-	int bitBuf = *bitBufP, bitCnt = *bitCntP;

-	bitCnt += bs[1];

-	bitBuf |= bs[0] << (24 - bitCnt);

-	while (bitCnt >= 8) {

-		unsigned char c = (bitBuf >> 16) & 255;

-		stbiw__putc(s, c);

-		if (c == 255) {

-			stbiw__putc(s, 0);

-		}

-		bitBuf <<= 8;

-		bitCnt -= 8;

-	}

-	*bitBufP = bitBuf;

-	*bitCntP = bitCnt;

-}

-static void stbiw__jpg_DCT(float* d0p, float* d1p, float* d2p, float* d3p, float* d4p, float* d5p, float* d6p, float* d7p) {

-	float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;

-	float z1, z2, z3, z4, z5, z11, z13;

-	float tmp0 = d0 + d7;

-	float tmp7 = d0 - d7;

-	float tmp1 = d1 + d6;

-	float tmp6 = d1 - d6;

-	float tmp2 = d2 + d5;

-	float tmp5 = d2 - d5;

-	float tmp3 = d3 + d4;

-	float tmp4 = d3 - d4;

-	// Even part

-	float tmp10 = tmp0 + tmp3; // phase 2

-	float tmp13 = tmp0 - tmp3;

-	float tmp11 = tmp1 + tmp2;

-	float tmp12 = tmp1 - tmp2;

-	d0 = tmp10 + tmp11; // phase 3

-	d4 = tmp10 - tmp11;

-	z1 = (tmp12 + tmp13) * 0.707106781f; // c4

-	d2 = tmp13 + z1;					 // phase 5

-	d6 = tmp13 - z1;

-	// Odd part

-	tmp10 = tmp4 + tmp5; // phase 2

-	tmp11 = tmp5 + tmp6;

-	tmp12 = tmp6 + tmp7;

-	// The rotator is modified from fig 4-8 to avoid extra negations.

-	z5 = (tmp10 - tmp12) * 0.382683433f; // c6

-	z2 = tmp10 * 0.541196100f + z5;		 // c2-c6

-	z4 = tmp12 * 1.306562965f + z5;		 // c2+c6

-	z3 = tmp11 * 0.707106781f;			 // c4

-	z11 = tmp7 + z3; // phase 5

-	z13 = tmp7 - z3;

-	*d5p = z13 + z2; // phase 6

-	*d3p = z13 - z2;

-	*d1p = z11 + z4;

-	*d7p = z11 - z4;

-	*d0p = d0;

-	*d2p = d2;

-	*d4p = d4;

-	*d6p = d6;

-}

-static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {

-	int tmp1 = val < 0 ? -val : val;

-	val = val < 0 ? val - 1 : val;

-	bits[1] = 1;

-	while (tmp1 >>= 1) {

-		++bits[1];

-	}

-	bits[0] = val & ((1 << bits[1]) - 1);

-}

-static int stbiw__jpg_processDU(stbi__write_context* s, int* bitBuf, int* bitCnt, float* CDU, int du_stride, float* fdtbl, int DC,

-								const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {

-	const unsigned short EOB[2] = {HTAC[0x00][0], HTAC[0x00][1]};

-	const unsigned short M16zeroes[2] = {HTAC[0xF0][0], HTAC[0xF0][1]};

-	int dataOff, i, j, n, diff, end0pos, x, y;

-	int DU[64];

-	// DCT rows

-	for (dataOff = 0, n = du_stride * 8; dataOff < n; dataOff += du_stride) {

-		stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + 1], &CDU[dataOff + 2], &CDU[dataOff + 3], &CDU[dataOff + 4], &CDU[dataOff + 5], &CDU[dataOff + 6],

-					   &CDU[dataOff + 7]);

-	}

-	// DCT columns

-	for (dataOff = 0; dataOff < 8; ++dataOff) {

-		stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + du_stride], &CDU[dataOff + du_stride * 2], &CDU[dataOff + du_stride * 3], &CDU[dataOff + du_stride * 4],

-					   &CDU[dataOff + du_stride * 5], &CDU[dataOff + du_stride * 6], &CDU[dataOff + du_stride * 7]);

-	}

-	// Quantize/descale/zigzag the coefficients

-	for (y = 0, j = 0; y < 8; ++y) {

-		for (x = 0; x < 8; ++x, ++j) {

-			float v;

-			i = y * du_stride + x;

-			v = CDU[i] * fdtbl[j];

-			// DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) :

-			// floorf(v + 0.5f)); ceilf() and floorf() are C99, not C89, but I

-			// /think/ they're not needed here anyway?

-			DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);

-		}

-	}

-	// Encode DC

-	diff = DU[0] - DC;

-	if (diff == 0) {

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);

-	} else {

-		unsigned short bits[2];

-		stbiw__jpg_calcBits(diff, bits);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);

-	}

-	// Encode ACs

-	end0pos = 63;

-	for (; (end0pos > 0) && (DU[end0pos] == 0); --end0pos) {

-	}

-	// end0pos = first element in reverse order !=0

-	if (end0pos == 0) {

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);

-		return DU[0];

-	}

-	for (i = 1; i <= end0pos; ++i) {

-		int startpos = i;

-		int nrzeroes;

-		unsigned short bits[2];

-		for (; DU[i] == 0 && i <= end0pos; ++i) {

-		}

-		nrzeroes = i - startpos;

-		if (nrzeroes >= 16) {

-			int lng = nrzeroes >> 4;

-			int nrmarker;

-			for (nrmarker = 1; nrmarker <= lng; ++nrmarker)

-				stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);

-			nrzeroes &= 15;

-		}

-		stbiw__jpg_calcBits(DU[i], bits);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes << 4) + bits[1]]);

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);

-	}

-	if (end0pos != 63) {

-		stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);

-	}

-	return DU[0];

-}

-static int stbi_write_jpg_core(stbi__write_context* s, int width, int height, int comp, const void* data, int quality) {

-	// Constants that don't pollute global namespace

-	static const unsigned char std_dc_luminance_nrcodes[] = {0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0};

-	static const unsigned char std_dc_luminance_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};

-	static const unsigned char std_ac_luminance_nrcodes[] = {0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d};

-	static const unsigned char std_ac_luminance_values[] = {

-		0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,

-		0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,

-		0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,

-		0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,

-		0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,

-		0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,

-		0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa};

-	static const unsigned char std_dc_chrominance_nrcodes[] = {0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};

-	static const unsigned char std_dc_chrominance_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};

-	static const unsigned char std_ac_chrominance_nrcodes[] = {0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77};

-	static const unsigned char std_ac_chrominance_values[] = {

-		0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,

-		0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,

-		0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,

-		0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,

-		0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,

-		0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,

-		0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa};

-	// Huffman tables

-	static const unsigned short YDC_HT[256][2] = {{0, 2}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}, {254, 8}, {510, 9}};

-	static const unsigned short UVDC_HT[256][2] = {{0, 2},  {1, 2},   {2, 2},   {6, 3},   {14, 4},	{30, 5},

-												   {62, 6}, {126, 7}, {254, 8}, {510, 9}, {1022, 10}, {2046, 11}};

-	static const unsigned short YAC_HT[256][2] = {

-		{10, 4},	 {0, 2},	  {1, 2},	  {4, 3},		{11, 4},	 {26, 5},	 {120, 7},	{248, 8},	{1014, 10},  {65410, 16}, {65411, 16},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {12, 4},	 {27, 5},		{121, 7},	{502, 9},	{2038, 11},

-		{65412, 16}, {65413, 16}, {65414, 16}, {65415, 16}, {65416, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{28, 5},	 {249, 8},	{1015, 10},  {4084, 12},  {65417, 16}, {65418, 16}, {65419, 16}, {65420, 16}, {65421, 16}, {65422, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {58, 6},	 {503, 9},	{4085, 12},  {65423, 16}, {65424, 16}, {65425, 16},

-		{65426, 16}, {65427, 16}, {65428, 16}, {65429, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {59, 6},

-		{1016, 10},  {65430, 16}, {65431, 16}, {65432, 16}, {65433, 16}, {65434, 16}, {65435, 16}, {65436, 16}, {65437, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{122, 7},	{2039, 11},  {65438, 16}, {65439, 16}, {65440, 16}, {65441, 16}, {65442, 16},

-		{65443, 16}, {65444, 16}, {65445, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {123, 7},	{4086, 12},

-		{65446, 16}, {65447, 16}, {65448, 16}, {65449, 16}, {65450, 16}, {65451, 16}, {65452, 16}, {65453, 16}, {0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {250, 8},	{4087, 12},  {65454, 16}, {65455, 16}, {65456, 16}, {65457, 16}, {65458, 16}, {65459, 16},

-		{65460, 16}, {65461, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{504, 9},	{32704, 15}, {65462, 16},

-		{65463, 16}, {65464, 16}, {65465, 16}, {65466, 16}, {65467, 16}, {65468, 16}, {65469, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {505, 9},	{65470, 16}, {65471, 16}, {65472, 16}, {65473, 16}, {65474, 16}, {65475, 16}, {65476, 16}, {65477, 16},

-		{65478, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {506, 9},	{65479, 16}, {65480, 16}, {65481, 16},

-		{65482, 16}, {65483, 16}, {65484, 16}, {65485, 16}, {65486, 16}, {65487, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {1017, 10},  {65488, 16}, {65489, 16}, {65490, 16}, {65491, 16}, {65492, 16}, {65493, 16}, {65494, 16}, {65495, 16}, {65496, 16},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {1018, 10},  {65497, 16}, {65498, 16}, {65499, 16}, {65500, 16},

-		{65501, 16}, {65502, 16}, {65503, 16}, {65504, 16}, {65505, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{2040, 11},  {65506, 16}, {65507, 16}, {65508, 16}, {65509, 16}, {65510, 16}, {65511, 16}, {65512, 16}, {65513, 16}, {65514, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {65515, 16}, {65516, 16}, {65517, 16}, {65518, 16}, {65519, 16}, {65520, 16},

-		{65521, 16}, {65522, 16}, {65523, 16}, {65524, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {2041, 11},  {65525, 16},

-		{65526, 16}, {65527, 16}, {65528, 16}, {65529, 16}, {65530, 16}, {65531, 16}, {65532, 16}, {65533, 16}, {65534, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0}};

-	static const unsigned short UVAC_HT[256][2] = {

-		{0, 2},		 {1, 2},	  {4, 3},	  {10, 4},		{24, 5},	 {25, 5},	 {56, 6},	 {120, 7},	{500, 9},	{1014, 10},  {4084, 12},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {11, 4},	 {57, 6},		{246, 8},	{501, 9},	{2038, 11},

-		{4085, 12},  {65416, 16}, {65417, 16}, {65418, 16}, {65419, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{26, 5},	 {247, 8},	{1015, 10},  {4086, 12},  {32706, 15}, {65420, 16}, {65421, 16}, {65422, 16}, {65423, 16}, {65424, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {27, 5},	 {248, 8},	{1016, 10},  {4087, 12},  {65425, 16}, {65426, 16},

-		{65427, 16}, {65428, 16}, {65429, 16}, {65430, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {58, 6},

-		{502, 9},	{65431, 16}, {65432, 16}, {65433, 16}, {65434, 16}, {65435, 16}, {65436, 16}, {65437, 16}, {65438, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{59, 6},	 {1017, 10},  {65439, 16}, {65440, 16}, {65441, 16}, {65442, 16}, {65443, 16},

-		{65444, 16}, {65445, 16}, {65446, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {121, 7},	{2039, 11},

-		{65447, 16}, {65448, 16}, {65449, 16}, {65450, 16}, {65451, 16}, {65452, 16}, {65453, 16}, {65454, 16}, {0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {122, 7},	{2040, 11},  {65455, 16}, {65456, 16}, {65457, 16}, {65458, 16}, {65459, 16}, {65460, 16},

-		{65461, 16}, {65462, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{249, 8},	{65463, 16}, {65464, 16},

-		{65465, 16}, {65466, 16}, {65467, 16}, {65468, 16}, {65469, 16}, {65470, 16}, {65471, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {503, 9},	{65472, 16}, {65473, 16}, {65474, 16}, {65475, 16}, {65476, 16}, {65477, 16}, {65478, 16}, {65479, 16},

-		{65480, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {504, 9},	{65481, 16}, {65482, 16}, {65483, 16},

-		{65484, 16}, {65485, 16}, {65486, 16}, {65487, 16}, {65488, 16}, {65489, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{0, 0},		 {505, 9},	{65490, 16}, {65491, 16}, {65492, 16}, {65493, 16}, {65494, 16}, {65495, 16}, {65496, 16}, {65497, 16}, {65498, 16},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {506, 9},	{65499, 16}, {65500, 16}, {65501, 16}, {65502, 16},

-		{65503, 16}, {65504, 16}, {65505, 16}, {65506, 16}, {65507, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

-		{2041, 11},  {65508, 16}, {65509, 16}, {65510, 16}, {65511, 16}, {65512, 16}, {65513, 16}, {65514, 16}, {65515, 16}, {65516, 16}, {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {16352, 14}, {65517, 16}, {65518, 16}, {65519, 16}, {65520, 16}, {65521, 16},

-		{65522, 16}, {65523, 16}, {65524, 16}, {65525, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {1018, 10},  {32707, 15},

-		{65526, 16}, {65527, 16}, {65528, 16}, {65529, 16}, {65530, 16}, {65531, 16}, {65532, 16}, {65533, 16}, {65534, 16}, {0, 0},	  {0, 0},

-		{0, 0},		 {0, 0},	  {0, 0}};

-	static const int YQT[] = {16, 11,  10,  16, 24, 40, 51, 61, 12,  12,  14,  19,  26, 58, 60, 55,  14,  13,  16,  24, 40, 57,

-							  69, 56,  14,  17, 22, 29, 51, 87, 80,  62,  18,  22,  37, 56, 68, 109, 103, 77,  24,  35, 55, 64,

-							  81, 104, 113, 92, 49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98,  112, 100, 103, 99};

-	static const int UVQT[] = {17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, 24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99,

-							   99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99};

-	static const float aasf[] = {1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,

-								 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f};

-	int row, col, i, k, subsample;

-	float fdtbl_Y[64], fdtbl_UV[64];

-	unsigned char YTable[64], UVTable[64];

-	if (!data || !width || !height || comp > 4 || comp < 1) {

-		return 0;

-	}

-	quality = quality ? quality : 90;

-	subsample = quality <= 90 ? 1 : 0;

-	quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;

-	quality = quality < 50 ? 5000 / quality : 200 - quality * 2;

-	for (i = 0; i < 64; ++i) {

-		int uvti, yti = (YQT[i] * quality + 50) / 100;

-		YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti);

-		uvti = (UVQT[i] * quality + 50) / 100;

-		UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);

-	}

-	for (row = 0, k = 0; row < 8; ++row) {

-		for (col = 0; col < 8; ++col, ++k) {

-			fdtbl_Y[k] = 1 / (YTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);

-			fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);

-		}

-	}

-	// Write Headers

-	{

-		static const unsigned char head0[] = {0xFF, 0xD8, 0xFF, 0xE0, 0, 0x10, 'J', 'F', 'I', 'F', 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0xFF, 0xDB, 0, 0x84, 0};

-		static const unsigned char head2[] = {0xFF, 0xDA, 0, 0xC, 3, 1, 0, 2, 0x11, 3, 0x11, 0, 0x3F, 0};

-		const unsigned char head1[] = {0xFF,

-									   0xC0,

-									   0,

-									   0x11,

-									   8,

-									   (unsigned char)(height >> 8),

-									   STBIW_UCHAR(height),

-									   (unsigned char)(width >> 8),

-									   STBIW_UCHAR(width),

-									   3,

-									   1,

-									   (unsigned char)(subsample ? 0x22 : 0x11),

-									   0,

-									   2,

-									   0x11,

-									   1,

-									   3,

-									   0x11,

-									   1,

-									   0xFF,

-									   0xC4,

-									   0x01,

-									   0xA2,

-									   0};

-		s->func(s->context, (void*)head0, sizeof(head0));

-		s->func(s->context, (void*)YTable, sizeof(YTable));

-		stbiw__putc(s, 1);

-		s->func(s->context, UVTable, sizeof(UVTable));

-		s->func(s->context, (void*)head1, sizeof(head1));

-		s->func(s->context, (void*)(std_dc_luminance_nrcodes + 1), sizeof(std_dc_luminance_nrcodes) - 1);

-		s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));

-		stbiw__putc(s, 0x10); // HTYACinfo

-		s->func(s->context, (void*)(std_ac_luminance_nrcodes + 1), sizeof(std_ac_luminance_nrcodes) - 1);

-		s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));

-		stbiw__putc(s, 1); // HTUDCinfo

-		s->func(s->context, (void*)(std_dc_chrominance_nrcodes + 1), sizeof(std_dc_chrominance_nrcodes) - 1);

-		s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));

-		stbiw__putc(s, 0x11); // HTUACinfo

-		s->func(s->context, (void*)(std_ac_chrominance_nrcodes + 1), sizeof(std_ac_chrominance_nrcodes) - 1);

-		s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));

-		s->func(s->context, (void*)head2, sizeof(head2));

-	}

-	// Encode 8x8 macroblocks

-	{

-		static const unsigned short fillBits[] = {0x7F, 7};

-		int DCY = 0, DCU = 0, DCV = 0;

-		int bitBuf = 0, bitCnt = 0;

-		// comp == 2 is grey+alpha (alpha is ignored)

-		int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;

-		const unsigned char* dataR = (const unsigned char*)data;

-		const unsigned char* dataG = dataR + ofsG;

-		const unsigned char* dataB = dataR + ofsB;

-		int x, y, pos;

-		if (subsample) {

-			for (y = 0; y < height; y += 16) {

-				for (x = 0; x < width; x += 16) {

-					float Y[256], U[256], V[256];

-					for (row = y, pos = 0; row < y + 16; ++row) {

-						// row >= height => use last input row

-						int clamped_row = (row < height) ? row : height - 1;

-						int base_p = (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) : clamped_row) * width * comp;

-						for (col = x; col < x + 16; ++col, ++pos) {

-							// if col >= width => use pixel from last input

-							// column

-							int p = base_p + ((col < width) ? col : (width - 1)) * comp;

-							float r = dataR[p], g = dataG[p], b = dataB[p];

-							Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128;

-							U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b;

-							V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b;

-						}

-					}

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					// subsample U,V

-					{

-						float subU[64], subV[64];

-						int yy, xx;

-						for (yy = 0, pos = 0; yy < 8; ++yy) {

-							for (xx = 0; xx < 8; ++xx, ++pos) {

-								int j = yy * 32 + xx * 2;

-								subU[pos] = (U[j + 0] + U[j + 1] + U[j + 16] + U[j + 17]) * 0.25f;

-								subV[pos] = (V[j + 0] + V[j + 1] + V[j + 16] + V[j + 17]) * 0.25f;

-							}

-						}

-						DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);

-						DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);

-					}

-				}

-			}

-		} else {

-			for (y = 0; y < height; y += 8) {

-				for (x = 0; x < width; x += 8) {

-					float Y[64], U[64], V[64];

-					for (row = y, pos = 0; row < y + 8; ++row) {

-						// row >= height => use last input row

-						int clamped_row = (row < height) ? row : height - 1;

-						int base_p = (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) : clamped_row) * width * comp;

-						for (col = x; col < x + 8; ++col, ++pos) {

-							// if col >= width => use pixel from last input

-							// column

-							int p = base_p + ((col < width) ? col : (width - 1)) * comp;

-							float r = dataR[p], g = dataG[p], b = dataB[p];

-							Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128;

-							U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b;

-							V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b;

-						}

-					}

-					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT);

-					DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);

-					DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);

-				}

-			}

-		}

-		// Do the bit alignment of the EOI marker

-		stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);

-	}

-	// EOI

-	stbiw__putc(s, 0xFF);

-	stbiw__putc(s, 0xD9);

-	return 1;

-}

-STBIWDEF int stbi_write_jpg_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int quality) {

-	stbi__write_context s;

-	stbi__start_write_callbacks(&s, func, context);

-	return stbi_write_jpg_core(&s, x, y, comp, (void*)data, quality);

-}

-#ifndef STBI_WRITE_NO_STDIO

-STBIWDEF int stbi_write_jpg(char const* filename, int x, int y, int comp, const void* data, int quality) {

-	stbi__write_context s;

-	if (stbi__start_write_file(&s, filename)) {

-		int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);

-		stbi__end_write_file(&s);

-		return r;

-	} else

-		return 0;

-}

-#endif

-#endif // STB_IMAGE_WRITE_IMPLEMENTATION

-/* Revision history

-	  1.14  (2020-02-02) updated JPEG writer to downsample chroma channels

-	  1.13

-	  1.12

-	  1.11  (2019-08-11)

-	  1.10  (2019-02-07)

-			 support utf8 filenames in Windows; fix warnings and platform ifdefs

-	  1.09  (2018-02-11)

-			 fix typo in zlib quality API, improve STB_I_W_STATIC in C++

-	  1.08  (2018-01-29)

-			 add stbi__flip_vertically_on_write, external zlib, zlib quality,

-   choose PNG filter 1.07  (2017-07-24) doc fix 1.06 (2017-07-23) writing JPEG

-   (using Jon Olick's code) 1.05   ??? 1.04 (2017-03-03) monochrome BMP

-   expansion 1.03   ??? 1.02 (2016-04-02) avoid allocating large structures on

-   the stack 1.01 (2016-01-16) STBIW_REALLOC_SIZED: support allocators with no

-   realloc support avoid race-condition in crc initialization minor compile

-   issues 1.00 (2015-09-14) installable file IO function 0.99 (2015-09-13)

-			 warning fixes; TGA rle support

-	  0.98 (2015-04-08)

-			 added STBIW_MALLOC, STBIW_ASSERT etc

-	  0.97 (2015-01-18)

-			 fixed HDR asserts, rewrote HDR rle logic

-	  0.96 (2015-01-17)

-			 add HDR output

-			 fix monochrome BMP

-	  0.95 (2014-08-17)

-					   add monochrome TGA output

-	  0.94 (2014-05-31)

-			 rename private functions to avoid conflicts with stb_image.h

-	  0.93 (2014-05-27)

-			 warning fixes

-	  0.92 (2010-08-01)

-			 casts to unsigned char to fix warnings

-	  0.91 (2010-07-17)

-			 first public release

-	  0.90   first internal release

-*/

-/*

-------------------------------------------------------------------------------

-This software is available under 2 licenses -- choose whichever you prefer.

-------------------------------------------------------------------------------

-ALTERNATIVE A - MIT License

-Copyright (c) 2017 Sean Barrett

-Permission is hereby granted, free of charge, to any person obtaining a copy of

-this software and associated documentation files (the "Software"), to deal in

-the Software without restriction, including without limitation the rights to

-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

-of the Software, and to permit persons to whom the Software is furnished to do

-so, subject to the following conditions:

-The above copyright notice and this permission notice shall be included in all

-copies or substantial portions of the Software.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

-SOFTWARE.

-------------------------------------------------------------------------------

-ALTERNATIVE B - Public Domain (www.unlicense.org)

-This is free and unencumbered software released into the public domain.

-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

-software, either in source code form or as a compiled binary, for any purpose,

-commercial or non-commercial, and by any means.

-In jurisdictions that recognize copyright laws, the author or authors of this

-software dedicate any and all copyright interest in the software to the public

-domain. We make this dedication for the benefit of the public at large and to

-the detriment of our heirs and successors. We intend this dedication to be an

-overt act of relinquishment in perpetuity of all present and future rights to

-this software under copyright law.

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

-------------------------------------------------------------------------------

-*/

--- a/SDL_Examples/include/stringutil.h

+++ /dev/null

@@ -1,330 +1,0 @@

-#include <string.h>

-#include <stdlib.h>

-#include <stdio.h>

-#include <ctype.h>

-//Before we get on, "stringutil.h" is the most C-ish name for a source code file ever, amirite?

-#ifndef STRUTIL_ALLOC

-#define STRUTIL_ALLOC(s) malloc(s)

-#endif

-#ifndef STRUTIL_FREE

-#define STRUTIL_FREE(s) free(s)

-#endif

-#ifndef STRUTIL_REALLOC

-#define STRUTIL_REALLOC(s, t) realloc(s,t)

-#endif

-#ifndef STRUTIL_NO_SHORT_NAMES

-#define strcata strcatalloc

-#define strcataf1 strcatallocf1

-#define strcataf2 strcatallocf2

-#define strcatafb strcatallocfb

-#endif

-//Strcat but with malloc.

-static inline char* strcatalloc(const char* s1, const char* s2){

-	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		strcpy(d, s1);

-		strcat(d, s2);

-	}

-	return d;

-}

-//Free the first argument.

-static inline char* strcatallocf1(char* s1, const char* s2){

-	char* d = STRUTIL_REALLOC(s1, strlen(s1) + strlen(s2) + 1);

-	//char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		//strcpy(d, s1);

-		strcat(d, s2);

-	}

-	//STRUTIL_FREE(s1);

-	return d;

-}

-//Free the second argument.

-static inline char* strcatallocf2(const char* s1, char* s2){

-	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		strcpy(d, s1);

-		strcat(d, s2);

-	}

-	STRUTIL_FREE(s2);

-	return d;

-}

-//Free both arguments

-static inline char* strcatallocfb(char* s1, char* s2){

-	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

-	if(d){

-		strcpy(d, s1);

-		strcat(d, s2);

-	}

-	STRUTIL_FREE(s1);

-	STRUTIL_FREE(s2);

-	return d;

-}

-//Convert a non-null-terminated URL into a null terminated one.

-static inline char* str_null_terminated_alloc(const char* in, unsigned int len){

-	char* d = NULL; d = malloc(len+1);

-	if(d){

-		memcpy(d,in,len);

-		d[len] = '\0';

-	}

-	return d;

-}

-static inline unsigned int strprefix(const char *pre, const char *str)

-{

-    size_t lenpre = strlen(pre),

-           lenstr = strlen(str);

-    return lenstr < lenpre ? 0 : memcmp(pre, str, lenpre) == 0;

-}

-//Someone once said sub-string search was an O(n^2) algorithm. What the hell?

-static inline long long strfind(const char* text, const char* subtext){

-	long long ti = 0;

-	long long si = 0;

-	long long st = strlen(subtext);

-	for(;text[ti] != '\0';ti++){

-		if(text[ti] == subtext[si]) {

-			si++;

-			if(subtext[si] == '\0') return (ti - st)+1;

-		}else {

-			si = 0;

-			if(subtext[si] == '\0') return (ti - st);

-		}

-	}

-	return -1;

-}

-//Read file until terminator character is found.

-//Returns the number of characters copied.

-static inline unsigned long long read_until_terminator(FILE* f, char* buf, const unsigned long long buflen, char terminator){

-	unsigned long long i = 0;

-	char c;

-	for(i = 0; i < (buflen-1); i++)

-	{

-		if(feof(f))break;

-		c = fgetc(f);

-		if(c == terminator)break;

-		buf[i] = c;

-	}

-	buf[buflen-1] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!

-	return i;

-}

-//Same as above but allocates memory to guarantee it can hold the entire thing. Grows naturally.

-static inline char* read_until_terminator_alloced(FILE* f, unsigned long long* lenout, char terminator, unsigned long long initsize){

-	char c;

-	char* buf = STRUTIL_ALLOC(initsize);

-	if(!buf) return NULL;

-	unsigned long long bcap = initsize;

-	unsigned long long blen = 0;

-	while(1){

-		if(feof(f)){break;}

-		c = fgetc(f);

-		if(c == terminator) {break;}

-		if(blen == (bcap-1))	//Grow the buffer.

-			{

-				bcap<<=1;

-				char* bufold = buf;

-				buf = STRUTIL_REALLOC(buf, bcap);

-				if(!buf){free(bufold); return NULL;}

-			}

-		buf[blen++] = c;

-	}

-	buf[blen] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!

-	*lenout = blen;

-	return buf;

-}

-static inline void* read_file_into_alloced_buffer(FILE* f, unsigned long long* len){

-	void* buf = NULL;

-	if(!f) return NULL;

-	fseek(f, 0, SEEK_END);

-	*len = ftell(f);

-	fseek(f,0,SEEK_SET);

-	buf = STRUTIL_ALLOC(*len + 1);

-	if(!buf) return NULL;

-	fread(buf, 1, *len, f);

-	((char*)buf)[*len] = '\0';

-	return buf;

-}

-//GEK'S SIMPLE TEXT COMPRESSION SCHEMA

-/*LIMITATIONS

-* Token names must be alphabetic (a-z, A-Z)

-* The token mark must be escaped with a backslash.

-* Token names which are substrings of other ones must be listed later

-*/

-static inline char* strencodealloc(const char* inbuf, const char** tokens, unsigned long long ntokens, char esc, char tokmark){

-	unsigned long long lenin = strlen(inbuf);

-	char c; unsigned long long i = 0;

-	char c_str[512] = {0}; //We are going to be sprintf-ing to this buffer.

-	char* out = NULL;

-	c_str[0] = esc;

-	c_str[1] = tokmark;

-	out = strcatalloc(c_str, "");

-	c_str[0] = 0;

-	c_str[1] = 0;

-	//Write out all the token entries. format is namelength~definition

-	for(unsigned long long j = 0; j < ntokens; j++){

-		out = strcataf1(out, tokens[2*j]);

-		//Write out the length of the token.

-		snprintf(c_str, 512, "%llu", (unsigned long long)strlen(tokens[2*j+1]));

-		out = strcataf1(out, c_str);

-		c_str[0] = tokmark;

-		c_str[1] = 0;

-		out = strcataf1(out, c_str);

-		out = strcataf1(out, tokens[2*j+1]);

-	}

-	c_str[0] = esc;

-	c_str[1] = 0;

-	out = strcataf1(out, c_str);

-	//We have now created the header. Now to begin encoding the text.

-	for(i=0; i<lenin; i++){

-		for(unsigned long long t = 0; t < ntokens; t++) //t- the token we are processing.

-			if(strprefix(tokens[t*2+1], inbuf+i)){ //Matched at least one

-				unsigned long long howmany = 1;

-				unsigned long long curtoklen = strlen(tokens[t*2+1]); //Length of the current token we are counting

-				for(unsigned long long h=1;i+h*curtoklen < lenin;h++){

-					if(strprefix(tokens[t*2+1], inbuf+i+h*curtoklen))

-						{howmany++;}

-					else

-						break; //The number of these things is limited.

-				}

-				//We know what token and how many, write it to out

-				c_str[0] = tokmark;

-				c_str[1] = 0;

-				out = strcataf1(out, c_str);

-				if(howmany > 1){

-					snprintf(c_str, 512, "%llu", (unsigned long long)howmany);

-					out = strcataf1(out, c_str);

-				}

-				out = strcataf1(out, tokens[t*2]);

-				i+=howmany*curtoklen;

-				continue;

-			}

-		//Test if we need to escape a sequence.

-		if(inbuf[i] == esc || inbuf[i] == tokmark){

-			c_str[0] = esc;

-			c_str[1] = 0;

-			out = strcataf1(out, c_str);

-		}

-		//We were unable to find a match, just write the character out.

-		c_str[0] = inbuf[i];

-		c_str[1] = 0;

-		out = strcataf1(out, c_str);

-	}

-	return out;

-}

-static inline char* strdecodealloc(char* inbuf){

-	unsigned long long lenin = strlen(inbuf);

-	if(lenin < 3) {

-		//puts("\nToo Short!\n");

-		return NULL;

-	}

-	char esc = inbuf[0]; //The escape character is the first one.

-	char tokmark = inbuf[1]; //Begin token character.

-	//printf("Escape is %c, tokmark is %c\n", esc, tokmark);

-	char c; unsigned long long i = 2;

-	char c_str[2] = {0,0};

-	//Our decoded text.

-	char* out = strcatalloc("","");

-	//Tokens for replacement, even is the token,

-	//odd is its definition

-	char** tokens = NULL;

-	//unsigned long long* toklens = NULL;

-	unsigned long long ntokens = 0;

-//#define {if(i <= lenin) c = inbuf[i++]; else {goto end;}} {if(i <= lenin) c = inbuf[i++]; else {goto end;}}

-	//Retrieve the tokens.

-	{if(i <= lenin) c = inbuf[i++]; else {goto end;}}; //has to occur before the loop.

-	while(c != esc){	ntokens++;

-		tokens = STRUTIL_REALLOC(tokens, ntokens * 2 * sizeof(char*));

-		//toklens = STRUTIL_REALLOC(toklens, ntokens * sizeof(unsigned long long));

-		//toklens[ntokens-1] = 0;

-		tokens[(ntokens-1)*2] = strcatalloc("","");

-		tokens[(ntokens-1)*2+1] = strcatalloc("","");

-		//name of token is tokens[(ntokens-1)*2] and its definition is tokens[(ntokens-1)*2+1]

-		//Get the name of the token.

-		if(!isalpha(c)) goto end;	//Error! Can't have Break out.

-		while(isalpha(c)){

-			c_str[0] = c;

-			tokens[(ntokens-1)*2] = strcatallocf1(tokens[(ntokens-1)*2], c_str);

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-		}

-		//The last retrieve() got us the first digit of the token length.

-		//Get the length of the token

-		unsigned long long l = 0;

-		if(!isdigit(c)) goto end;

-		while(isdigit(c) && c!=tokmark){

-			c_str[0] = c;

-			l *= 10;

-			l += atoi(c_str);

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-		}

-		//toklens[ntokens-1] = l;

-		//We have the name of the token and its length, the last {if(i <= lenin) c = inbuf[i++]; else {goto end;}} got us the token character (~ in my example)

-		//Now we can grab the token definition.

-		for(unsigned long long vv = 0; vv < l; vv++){

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-			c_str[0] = c;

-			tokens[(ntokens-1)*2+1] = strcatallocf1(tokens[(ntokens-1)*2+1], c_str);

-		}

-		{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-	//	printf("\nTOKEN %s IS %s, length %llu",tokens[(ntokens-1)*2] ,tokens[(ntokens-1)*2+1], l);

-	}

-	//puts("\nREACHED ESCAPE CHARACTER.");

-	//Now we attempt to build our string

-	{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-	long long doescape = 0;

-	while(i<=lenin){

-		if(!doescape && c==esc){

-			doescape=1;{if(i <= lenin) c = inbuf[i++]; else {goto end;}};continue;

-		}

-		if(!doescape && c==tokmark){

-			//Handle digits prefixing a token.

-			unsigned long long l = 0;

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-			if(isdigit(c))

-				while(isdigit(c)){

-					c_str[0] = c;

-					l *= 10;

-					l += atoi(c_str);

-					{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-				}

-			else {l=1;}

-			i--;

-			for(unsigned long long t = 0; t < ntokens; t++)

-				if(strprefix(tokens[t*2], inbuf+i)){

-					//MATCH!

-					for(unsigned long long q = 0; q < l; q++)

-						out = strcatallocf1(out, tokens[t*2+1]);

-					i+=strlen(tokens[t*2]);

-					break; //break out of the for.

-				}

-			if(i<=lenin) {if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-			continue;

-		}else{

-			c_str[0] = c;

-			out = strcatallocf1(out, c_str);

-			doescape = 0;

-			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

-		}

-	}

-	end:

-	if(tokens){

-		for(unsigned long long j = 0; j < ntokens; j++)

-			{STRUTIL_FREE(tokens[j*2]);STRUTIL_FREE(tokens[j*2+1]);}

-		STRUTIL_FREE(tokens);

-	}

-	//if(toklens)STRUTIL_FREE(toklens);

-	return out;

-}

--- a/SDL_Examples/include/tobjparse.h

+++ /dev/null

@@ -1,364 +1,0 @@

-/* Public Domain / CC0 3d OBJ Parser

-With support for Per Vertex Color (VC) Lines.

-Written by Gek (DMHSW) in 2020

-*/

-#ifndef TOBJ_PARSE_H

-#define TOBJ_PARSE_H

-#include "3dMath.h"

-#include <stdio.h>

-#include <string.h>

-#include <stdlib.h>

-#include <ctype.h>

-typedef struct{

-	long long unsigned int p;

-	long long unsigned int n;

-	long long unsigned int tc;

-	long long unsigned int vc;

-}facedef;

-typedef struct{

-	unsigned int npos, nnorm, ntexcoords, ncolors, nfaces;

-	vec3* positions;

-	vec3* normals;

-	vec3* texcoords;

-	vec3* colors;

-	facedef* faces;

-}objraw;

-typedef struct{

-	int npoints; //Number of points.

-	vec3* d; //Triangles (Same winding as in the file)

-	vec3* n; //Normals

-	vec3* t; //Texture Cordinates

-	vec3* c; //colors

-}model;

-objraw initobjraw(){

-	return (objraw){

-		.npos=0,

-		.nnorm=0,

-		.ntexcoords=0,

-		.ncolors=0,

-		.nfaces=0,

-		.positions=NULL,

-		.normals=NULL,

-		.texcoords=NULL,

-		.faces=NULL

-	};

-}

-model initmodel(){

-	return (model){

-		.npoints=0,

-		.d=NULL,

-		.n=NULL,

-		.t=NULL,

-		.c=NULL

-	};

-}

-void freeobjraw(objraw* o){

-	free(o->positions);

-	free(o->texcoords);

-	free(o->normals);

-	free(o->colors);

-	free(o->faces);

-}

-void freemodel(model* o){

-	free(o->d);

-	free(o->t);

-	free(o->n);

-	free(o->c);

-}

-model tobj_tomodel(objraw* raw){

-	if(!raw || raw->faces == NULL)

-	{

-		puts("\nAttempted to convert empty model... Aborting...\n");

-		return initmodel();

-	}

-	model ret = initmodel();

-	ret.npoints = 0;

-	ret.d= malloc(sizeof(vec3) * raw->nfaces);

-	if(raw->normals)ret.n=malloc(sizeof(vec3) * raw->nfaces);

-	if(raw->texcoords)ret.t=malloc(sizeof(vec3) * raw->nfaces);

-	if(raw->colors)ret.c=malloc(sizeof(vec3) * raw->nfaces);

-	long long unsigned int piter = 0;

-	long long unsigned int niter = 0;

-	long long unsigned int titer = 0;

-	long long unsigned int citer = 0;

-	//printf("\nsb_count of faces is %d",sb_count(raw->faces));

-	for(long long int i = 0; i < raw->nfaces;i++){

-		//printf("\n::%lld:: 0\n",i);

-		long long unsigned int p = raw->faces[i].p-1;

-		long long unsigned int n = raw->faces[i].n-1;

-		long long unsigned int t = raw->faces[i].tc-1;

-		long long unsigned int c = raw->faces[i].vc-1;

-		if(p < (long long unsigned int)raw->npos){

-			//sb_push(ret.d, raw->positions[p]);

-			ret.d[piter++] = raw->positions[p];

-			ret.npoints++;

-		} else {

-			printf("\nBad Data!!!\n");

-			printf("p=%llu n=%llu t=%llu c=%llu i=%lld\n\n",p,n,t,c,i);

-		}

-		if(raw->normals){

-			if(n < (long long unsigned int)raw->nnorm){

-				//sb_push(ret.n, raw->normals[n]);

-				ret.n[niter++] = raw->normals[n];

-			} else {

-				puts("\n<BAD DATA>, NORMALS\n");

-			}

-		}

-		if(raw->texcoords){

-			if(t < (long long unsigned int)raw->ntexcoords){

-				//sb_push(ret.t, raw->texcoords[t]);

-				ret.t[titer++] = raw->texcoords[t];

-			} else

-				puts("\n<BAD DATA>, TEXCOORDS\n");

-		}

-		if(raw->colors){

-			if(c < (long long unsigned int)raw->ncolors){

-				//sb_push(ret.c, raw->colors[c]);

-				ret.c[citer++] = raw->colors[c];

-			} else {

-				puts("\n<BAD DATA>, COLORS\n");

-			}

-		}

-	}

-	//printf("\ntobj_tomodel completed.\n");

-	if(ret.npoints != piter){

-		printf("\nBAD DATA!!! ABORTING...\n");

-		exit(1);

-	}

-	return ret;

-}

-//Only loads

-objraw tobj_load(const char* fn){

-	FILE* f;

-	f = fopen(fn, "r");

-	objraw retval = initobjraw();

-	if(f){

-		char line[2048];line[2047]=0;

-		//int read = 0;

-#define TOBJ_PUSH(type, vec, n, val){vec = realloc(vec, sizeof(type) * (n+1)); vec[n++] = val;}

-		while(fgets(line, 2047, f)){

-			vec3 val;

-			facedef frick0;

-			facedef frick1;

-			facedef frick2;

-			if(line[0] == 'v' && line[1] == ' ' && (strlen(line) > 4)){

-				//read = sscanf(line,"v %f %f %f",&val.d[0],&val.d[1],&val.d[2]);

-				//printf("\nv Read: %d",read);

-				//sb_push(retval.positions, val);

-				TOBJ_PUSH(vec3, retval.positions, retval.npos, val);

-				char* t = line+2;

-				//sb_last(retval.positions).d[0] = atof(t);

-				retval.positions[retval.npos-1].d[0] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.positions).d[1] = atof(t);

-				retval.positions[retval.npos-1].d[1] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.positions).d[2] = atof(t);

-				retval.positions[retval.npos-1].d[2] = atof(t);

-			}

-			if(line[0] == 'v' && line[1] == 't' && (strlen(line) > 4)){

-				//read = sscanf(line,"vt %f %f",&val.d[0],&val.d[1]);

-				//sb_push(retval.texcoords, val);

-				TOBJ_PUSH(vec3, retval.texcoords, retval.ntexcoords, val);

-				char* t = line+3;

-				//sb_last(retval.texcoords).d[0] = atof(t);

-				retval.texcoords[retval.ntexcoords-1].d[0] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.texcoords).d[1] = -atof(t);

-				retval.texcoords[retval.ntexcoords-1].d[1] = -atof(t);

-			}

-			if(line[0] == 'v' && line[1] == 'c' && (strlen(line) > 4)){

-				//read=sscanf(line,"vc %f %f %f",&val.d[0],&val.d[1],&val.d[2]);

-				//sb_push(retval.colors, val);

-				TOBJ_PUSH(vec3, retval.colors, retval.ncolors, val);

-				char* t = line+3;

-				//sb_last(retval.colors).d[0] = atof(t);

-				retval.colors[retval.ncolors-1].d[0] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.colors).d[1] = atof(t);

-				retval.colors[retval.ncolors-1].d[1] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.colors).d[2] = atof(t);

-				retval.colors[retval.ncolors-1].d[2] = atof(t);

-				//printf("\nvc Read: %d",read);

-			}

-			if(line[0] == 'v' && line[1] =='n' && (strlen(line) > 4)){

-				//read=sscanf(line,"vn %f %f %f",&val.d[0],&val.d[1],&val.d[2]);

-				//printf("\nn Read: %d",read);

-				//sb_push(retval.normals, val);

-				TOBJ_PUSH(vec3, retval.normals, retval.nnorm, val);

-				char* t = line+3;

-				//sb_last(retval.normals).d[0] = atof(t);

-				retval.normals[retval.nnorm-1].d[0] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.normals).d[1] = atof(t);

-				retval.normals[retval.nnorm-1].d[1] = atof(t);

-				while(!isspace(*t) && *t != '\0')t++;

-				if(*t == '\0')continue;

-				t++;

-				//sb_last(retval.normals).d[2] = atof(t);

-				retval.normals[retval.nnorm-1].d[2] = atof(t);

-			}

-			if(line[0] == 'f' && (strlen(line) > 4)){

-				//The face lines are hard to parse.

-				//They could be p p p

-				// or p/vt p/vt p/vt

-				//or p//n p//n p//n

-				//or p/vt/n p/vt/n p/vt/n

-				//or p/vt/n/c p/vt/n/c p/vt/n/c

-				//or some other combination, as long as it has p it's valid

-				//Grab the position indices

-				{

-					char* t = line+2;

-					frick0.p = strtoull(t,NULL,10);

-					//printf("\nf[0].p is %llu",frick0.p);

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					frick1.p = strtoull(t,NULL,10);

-					//printf("\nf[1].p is %llu",frick1.p);

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					frick2.p = strtoull(t,NULL,10);

-					//printf("\nf[2].p is %llu",frick2.p);

-				}

-				//Grab the texture coordinates (First character after first slash!)

-				if(retval.texcoords){

-					char* t = line+2;

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					frick0.tc = strtoull(t,NULL,10);

-					//printf("\nf[0].tc is %llu",frick0.tc);

-					//Jump to the next group of numbers

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					//first slash

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to next slash

-					t++;//next character

-					frick1.tc = strtoull(t,NULL,10);

-					//printf("\nf[1].tc is %llu",frick1.tc);

-					//Jump to the next group of numbers

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					//first slash

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to next slash

-					t++;//next character

-					frick2.tc = strtoull(t,NULL,10);

-					//printf("\nf[2].tc is %llu",frick2.tc);

-				}

-				if(retval.normals){

-					char* t = line+2;

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

-					t++;//next character

-					frick0.n = strtoull(t,NULL,10);

-					//Jump to the next group of numbers

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					//first slash

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

-					t++;//next character

-					frick1.n = strtoull(t,NULL,10);

-					//Jump to the next group of numbers

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					//first slash

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

-					t++;//next character

-					frick2.n = strtoull(t,NULL,10);

-				}

-				if(retval.colors){

-					char* t = line+2;

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to third slash

-					t++;//next character

-					frick0.vc = strtoull(t,NULL,10);

-					//printf("\nf[0].vc is %llu",frick0.vc);

-					//Jump to the next group of numbers

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					//first slash

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to third slash

-					t++;//next character

-					frick1.vc = strtoull(t,NULL,10);

-					//printf("\nf[1].vc is %llu",frick1.vc);

-					//Jump to the next group of numbers

-					while(!isspace(*t) && *t != '\0')t++;

-					if(*t == '\0')continue;

-					t++;

-					//first slash

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

-					t++;//next character

-					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to third slash

-					t++;//next character

-					frick2.vc = strtoull(t,NULL,10);

-					//printf("\nf[2].vc is %llu",frick2.vc);

-				}

-				//sb_push(retval.faces,frick0);

-				TOBJ_PUSH(facedef, retval.faces, retval.nfaces, frick0);

-				//sb_push(retval.faces,frick1);

-				TOBJ_PUSH(facedef, retval.faces, retval.nfaces, frick1);

-				//sb_push(retval.faces,frick2);

-				TOBJ_PUSH(facedef, retval.faces, retval.nfaces, frick2);

-				/*

-				printf("\nReading from sb, frick0.p=%llu frick0.n=%llu frick0.tc=%llu frick0.vc=%llu",

-					retval.faces[sb_count(retval.faces)-3].p,

-					retval.faces[sb_count(retval.faces)-3].n,

-					retval.faces[sb_count(retval.faces)-3].tc,

-					retval.faces[sb_count(retval.faces)-3].vc

-				);*/

-			}

-		}

-	fclose(f);

-	} else {

-		printf("\nUnable to load file %s\n",fn);

-	}

-	return retval;

-}

-#endif

--- a/SDL_Examples/menu.c

+++ b/SDL_Examples/menu.c

@@ -17,9 +17,9 @@

 #include "../include/zbuffer.h"

 #define CHAD_API_IMPL

 #define CHAD_MATH_IMPL

-#include "include/3dMath.h"

+#include "../include-demo/3dMath.h"

 #ifdef PLAY_MUSIC

-#include "include/api_audio.h"

+#include "../include-demo/api_audio.h"

 #else

 typedef unsigned char uchar;

 #endif

@@ -27,7 +27,7 @@

 //Gek's OpenIMGUI standard.

 #define OPENIMGUI_IMPL

-#include "include/openimgui.h"

+#include "../include-demo/openimgui.h"

 #ifndef M_PI

--- a/SDL_Examples/model.c

+++ b/SDL_Examples/model.c

@@ -13,14 +13,14 @@

 #include "../include/GL/gl.h"

 #define STB_IMAGE_IMPLEMENTATION

-#include "include/stb_image.h"

+#include "../include-demo/stb_image.h"

 #define CHAD_MATH_IMPL

-#include "include/3dMath.h"

-#include "include/tobjparse.h"

+#include "../include-demo/3dMath.h"

+#include "../include-demo/tobjparse.h"

 #define CHAD_API_IMPL

 #include "../include/zbuffer.h"

 #ifdef PLAY_MUSIC

-#include "include/api_audio.h"

+#include "../include-demo/api_audio.h"

 #else

 typedef unsigned char uchar;

 #endif

--- a/SDL_Examples/texture.c

+++ b/SDL_Examples/texture.c

@@ -15,14 +15,14 @@

 #include "../include/zbuffer.h"

 #define CHAD_API_IMPL

 #define CHAD_MATH_IMPL

-#include "include/3dMath.h"

+#include "../include-demo/3dMath.h"

 #ifdef PLAY_MUSIC

-#include "include/api_audio.h"

+#include "../include-demo/api_audio.h"

 #else

 typedef unsigned char uchar;

 #endif

 #define STB_IMAGE_IMPLEMENTATION

-#include "include/stb_image.h"

+#include "../include-demo/stb_image.h"

 #include <SDL/SDL.h>

 int noSDL = 0;

 int doPostProcess = 0;

--- /dev/null

+++ b/include-demo/3dMath.h

@@ -1,0 +1,554 @@

+/* Public Domain / CC0 C99 Vector Math Library

+*/

+#ifndef CHAD_MATH_H

+#define CHAD_MATH_H

+//#define CHAD_MATH_NO_ALIGN

+#ifndef CHAD_MATH_NO_ALIGN

+#include <stdalign.h>

+#define CHAD_ALIGN alignas(16)

+#else

+#define CHAD_ALIGN /*a comment*/

+#endif

+#include <math.h>

+#include <string.h>

+typedef float f_;

+typedef unsigned int uint;

+#define MAX(x,y) (x>y?x:y)

+#define MIN(x,y) (x<y?x:y)

+typedef struct {CHAD_ALIGN f_ d[3];} vec3;

+typedef struct {CHAD_ALIGN int d[3];} ivec3;

+typedef struct {CHAD_ALIGN f_ d[4];} vec4;

+typedef struct {CHAD_ALIGN f_ d[16];} mat4;

+//Collision detection

+//These Algorithms return the penetration vector into

+//the shape in the first argument

+//With depth of penetration in element 4

+//if depth of penetration is zero or lower then there is no penetration.

+typedef struct{

+	vec4 c;

+	vec3 e;

+}aabb;

+typedef aabb colshape; //c.d[3] determines if it's a sphere or box. 0 or less = box, greater than 0 = sphere

+static inline vec4 getrow( mat4 a,  uint index){

+	return (vec4){

+		.d[0]=a.d[0*4+index],

+		.d[1]=a.d[1*4+index],

+		.d[2]=a.d[2*4+index],

+		.d[3]=a.d[3*4+index]

+	};

+}

+static inline mat4 swapRowColumnMajor( mat4 in){

+	mat4 result;

+	vec4 t;

+	int i = 0;

+	t = getrow(in,i);

+	memcpy(result.d+i*4, t.d, 4*4);i++;

+	t = getrow(in,i);

+	memcpy(result.d+i*4, t.d, 4*4);i++;

+	t = getrow(in,i);

+	memcpy(result.d+i*4, t.d, 4*4);i++;

+	t = getrow(in,i);

+	memcpy(result.d+i*4, t.d, 4*4);

+	return result;

+}

+static inline vec4 getcol( mat4 a,  uint index){

+	return (vec4){

+		.d[0]=a.d[index*4+0],

+		.d[1]=a.d[index*4+1],

+		.d[2]=a.d[index*4+2],

+		.d[3]=a.d[index*4+3]

+	};

+}

+static inline mat4 scalemat4( vec4 s){

+	mat4 ret;

+	for(int i = 1; i < 16; i++)

+		ret.d[i]= 0.0;

+	ret.d[0*4 + 0] = s.d[0]; //x scale

+	ret.d[1*4 + 1] = s.d[1]; //y scale

+	ret.d[2*4 + 2] = s.d[2]; //z scale

+	ret.d[3*4 + 3] = s.d[3]; //w scale

+	return ret;

+}

+static inline int invmat4( mat4 m, mat4* invOut) //returns 1 if successful

+{

+    mat4 inv;

+    f_ det;

+    int i;

+    inv.d[0] = m.d[5]  * m.d[10] * m.d[15] -

+             m.d[5]  * m.d[11] * m.d[14] -

+             m.d[9]  * m.d[6]  * m.d[15] +

+             m.d[9]  * m.d[7]  * m.d[14] +

+             m.d[13] * m.d[6]  * m.d[11] -

+             m.d[13] * m.d[7]  * m.d[10];

+    inv.d[4] = -m.d[4]  * m.d[10] * m.d[15] +

+              m.d[4]  * m.d[11] * m.d[14] +

+              m.d[8]  * m.d[6]  * m.d[15] -

+              m.d[8]  * m.d[7]  * m.d[14] -

+              m.d[12] * m.d[6]  * m.d[11] +

+              m.d[12] * m.d[7]  * m.d[10];

+    inv.d[8] = m.d[4]  * m.d[9] * m.d[15] -

+             m.d[4]  * m.d[11] * m.d[13] -

+             m.d[8]  * m.d[5] * m.d[15] +

+             m.d[8]  * m.d[7] * m.d[13] +

+             m.d[12] * m.d[5] * m.d[11] -

+             m.d[12] * m.d[7] * m.d[9];

+    inv.d[12] = -m.d[4]  * m.d[9] * m.d[14] +

+               m.d[4]  * m.d[10] * m.d[13] +

+               m.d[8]  * m.d[5] * m.d[14] -

+               m.d[8]  * m.d[6] * m.d[13] -

+               m.d[12] * m.d[5] * m.d[10] +

+               m.d[12] * m.d[6] * m.d[9];

+    inv.d[1] = -m.d[1]  * m.d[10] * m.d[15] +

+              m.d[1]  * m.d[11] * m.d[14] +

+              m.d[9]  * m.d[2] * m.d[15] -

+              m.d[9]  * m.d[3] * m.d[14] -

+              m.d[13] * m.d[2] * m.d[11] +

+              m.d[13] * m.d[3] * m.d[10];

+    inv.d[5] = m.d[0]  * m.d[10] * m.d[15] -

+             m.d[0]  * m.d[11] * m.d[14] -

+             m.d[8]  * m.d[2] * m.d[15] +

+             m.d[8]  * m.d[3] * m.d[14] +

+             m.d[12] * m.d[2] * m.d[11] -

+             m.d[12] * m.d[3] * m.d[10];

+    inv.d[9] = -m.d[0]  * m.d[9] * m.d[15] +

+              m.d[0]  * m.d[11] * m.d[13] +

+              m.d[8]  * m.d[1] * m.d[15] -

+              m.d[8]  * m.d[3] * m.d[13] -

+              m.d[12] * m.d[1] * m.d[11] +

+              m.d[12] * m.d[3] * m.d[9];

+    inv.d[13] = m.d[0]  * m.d[9] * m.d[14] -

+              m.d[0]  * m.d[10] * m.d[13] -

+              m.d[8]  * m.d[1] * m.d[14] +

+              m.d[8]  * m.d[2] * m.d[13] +

+              m.d[12] * m.d[1] * m.d[10] -

+              m.d[12] * m.d[2] * m.d[9];

+    inv.d[2] = m.d[1]  * m.d[6] * m.d[15] -

+             m.d[1]  * m.d[7] * m.d[14] -

+             m.d[5]  * m.d[2] * m.d[15] +

+             m.d[5]  * m.d[3] * m.d[14] +

+             m.d[13] * m.d[2] * m.d[7] -

+             m.d[13] * m.d[3] * m.d[6];

+    inv.d[6] = -m.d[0]  * m.d[6] * m.d[15] +

+              m.d[0]  * m.d[7] * m.d[14] +

+              m.d[4]  * m.d[2] * m.d[15] -

+              m.d[4]  * m.d[3] * m.d[14] -

+              m.d[12] * m.d[2] * m.d[7] +

+              m.d[12] * m.d[3] * m.d[6];

+    inv.d[10] = m.d[0]  * m.d[5] * m.d[15] -

+              m.d[0]  * m.d[7] * m.d[13] -

+              m.d[4]  * m.d[1] * m.d[15] +

+              m.d[4]  * m.d[3] * m.d[13] +

+              m.d[12] * m.d[1] * m.d[7] -

+              m.d[12] * m.d[3] * m.d[5];

+    inv.d[14] = -m.d[0]  * m.d[5] * m.d[14] +

+               m.d[0]  * m.d[6] * m.d[13] +

+               m.d[4]  * m.d[1] * m.d[14] -

+               m.d[4]  * m.d[2] * m.d[13] -

+               m.d[12] * m.d[1] * m.d[6] +

+               m.d[12] * m.d[2] * m.d[5];

+    inv.d[3] = -m.d[1] * m.d[6] * m.d[11] +

+              m.d[1] * m.d[7] * m.d[10] +

+              m.d[5] * m.d[2] * m.d[11] -

+              m.d[5] * m.d[3] * m.d[10] -

+              m.d[9] * m.d[2] * m.d[7] +

+              m.d[9] * m.d[3] * m.d[6];

+    inv.d[7] = m.d[0] * m.d[6] * m.d[11] -

+             m.d[0] * m.d[7] * m.d[10] -

+             m.d[4] * m.d[2] * m.d[11] +

+             m.d[4] * m.d[3] * m.d[10] +

+             m.d[8] * m.d[2] * m.d[7] -

+             m.d[8] * m.d[3] * m.d[6];

+    inv.d[11] = -m.d[0] * m.d[5] * m.d[11] +

+               m.d[0] * m.d[7] * m.d[9] +

+               m.d[4] * m.d[1] * m.d[11] -

+               m.d[4] * m.d[3] * m.d[9] -

+               m.d[8] * m.d[1] * m.d[7] +

+               m.d[8] * m.d[3] * m.d[5];

+    inv.d[15] = m.d[0] * m.d[5] * m.d[10] -

+              m.d[0] * m.d[6] * m.d[9] -

+              m.d[4] * m.d[1] * m.d[10] +

+              m.d[4] * m.d[2] * m.d[9] +

+              m.d[8] * m.d[1] * m.d[6] -

+              m.d[8] * m.d[2] * m.d[5];

+    det = m.d[0] * inv.d[0] + m.d[1] * inv.d[4] + m.d[2] * inv.d[8] + m.d[3] * inv.d[12];

+    if (det == 0)

+        return 0;

+    det = 1.0 / det;

+    for (i = 0; i < 16; i++)

+        invOut->d[i] = inv.d[i] * det;

+    return 1;

+}

+static inline mat4 perspective( f_ fov,  f_ aspect,  f_ near,  f_ far){

+	mat4 ret;

+	f_ D2R = 3.14159265358979323 / 180.0;

+	f_ yScale = 1.0/tanf(D2R * fov/2);

+	f_ xScale = yScale/aspect;

+	f_ nearmfar = near-far;

+	ret.d[0*4+0] = xScale; 	ret.d[0*4+1]=0; 	ret.d[0*4+2]=0;					ret.d[0*4+3]=0;

+	ret.d[1*4+0]=0; 		ret.d[1*4+1]=yScale;ret.d[1*4+2]=0;					ret.d[1*4+3]=0;

+	ret.d[2*4+0]=0; 		ret.d[2*4+1]=0;		ret.d[2*4+2]=(far+near)/nearmfar;ret.d[2*4+3]=-1;

+	ret.d[3*4+0]=0; 		ret.d[3*4+1]=0;		ret.d[3*4+2]=2*far*near/nearmfar;ret.d[3*4+3]=0;

+	/*

+	ret.d[0*4+0] = xScale; 	ret.d[0*4+1]=0; 	ret.d[0*4+2]=0;						ret.d[0*4+3]=0;

+	ret.d[1*4+0]=0; 		ret.d[1*4+1]=yScale;ret.d[1*4+2]=0;						ret.d[1*4+3]=0;

+	ret.d[2*4+0]=0; 		ret.d[2*4+1]=0;		ret.d[2*4+2]=(far+near)/nearmfar;	ret.d[2*4+3]=2*far*near/nearmfar;

+	ret.d[3*4+0]=0; 		ret.d[3*4+1]=0;		ret.d[3*4+2]=-1;					ret.d[3*4+3]=0;

+	*/

+	return ret;

+}

+static inline vec3 viewport( uint xdim,  uint ydim,  vec3 input){

+	input.d[0] += 1;

+	input.d[1] += 1;

+	input.d[0] *= (f_)xdim / 2.0;

+	input.d[1] *= (f_)ydim / 2.0;

+	input.d[2] = (input.d[2])/2.0;

+	return input;

+}

+static inline mat4 rotate( vec3 rotation){

+	f_ a = rotation.d[0];

+	f_ b = rotation.d[1];

+	f_ c = rotation.d[2];

+	mat4 rm;

+	rm.d[0*4 + 0] = cosf(a)*cosf(b);

+	rm.d[1*4 + 0] = sinf(a)*cosf(b);

+	rm.d[2*4 + 0] = -sinf(b);

+	rm.d[0*4 + 1] = cosf(a)*sinf(b)*sinf(c)-sinf(a)*cosf(c);

+	rm.d[1*4 + 1] = sinf(a)*sinf(b)*sinf(c)+cosf(a)*cosf(c);

+	rm.d[2*4 + 1] = cosf(b)*sinf(c);

+	rm.d[0*4 + 2] = cosf(a)*sinf(b)*cosf(c)+sinf(a)*sinf(c);

+	rm.d[1*4 + 2] = sinf(a)*sinf(b)*cosf(c)-cosf(a)*sinf(c);

+	rm.d[2*4 + 2] = cosf(b)*cosf(c);

+	//the other parts

+	rm.d[0*4 + 3] = 0;

+	rm.d[1*4 + 3] = 0;

+	rm.d[2*4 + 3] = 0;

+	rm.d[3*4 + 3] = 1; //the bottom right corner of the matrix.

+	rm.d[3*4 + 0] = 0;

+	rm.d[3*4 + 1] = 0;

+	rm.d[3*4 + 2] = 0;

+	return rm;

+}

+static inline f_ clampf( f_ a,  f_ min,  f_ max){

+	if(a<min) return min;

+	if(a>max) return max;

+	return a;

+}

+static inline f_ lengthv3( vec3 a){

+	return sqrtf(a.d[0] * a.d[0] + a.d[1] * a.d[1] + a.d[2] * a.d[2]);

+}

+static inline f_ lengthv4( vec4 a){

+	return sqrtf(a.d[0] * a.d[0] + a.d[1] * a.d[1] + a.d[2] * a.d[2] + a.d[3] * a.d[3]);

+}

+static inline vec3 multvec3( vec3 a,  vec3 b){

+	return (vec3){

+		.d[0]=a.d[0]*b.d[0],

+		.d[1]=a.d[1]*b.d[1],

+		.d[2]=a.d[2]*b.d[2]

+	};

+}

+static inline vec4 multvec4( vec4 a,  vec4 b){

+	return (vec4){

+		.d[0]=a.d[0]*b.d[0],

+		.d[1]=a.d[1]*b.d[1],

+		.d[2]=a.d[2]*b.d[2],

+		.d[3]=a.d[3]*b.d[3]

+	};

+}

+static inline vec3 clampvec3( vec3 a,  vec3 min,  vec3 max){

+	vec3 ret;

+	ret.d[0] = clampf(a.d[0],min.d[0],max.d[0]);

+	ret.d[1] = clampf(a.d[1],min.d[1],max.d[1]);

+	ret.d[2] = clampf(a.d[2],min.d[2],max.d[2]);

+	return ret;

+}

+static inline vec4 clampvec4( vec4 a,  vec4 min,  vec4 max){

+	vec4 ret;

+	ret.d[0] = clampf(a.d[0],min.d[0],max.d[0]);

+	ret.d[1] = clampf(a.d[1],min.d[1],max.d[1]);

+	ret.d[2] = clampf(a.d[2],min.d[2],max.d[2]);

+	ret.d[3] = clampf(a.d[3],min.d[3],max.d[3]);

+	return ret;

+}

+static inline f_ dotv3( vec3 a,  vec3 b){

+	return a.d[0] * b.d[0] + a.d[1] * b.d[1] + a.d[2] * b.d[2];

+}

+static inline f_ dotv4( vec4 a,  vec4 b){

+	return a.d[0] * b.d[0] + a.d[1] * b.d[1] + a.d[2] * b.d[2] + a.d[3] * b.d[3];

+}

+static inline mat4 multm4( mat4 a,  mat4 b){

+	mat4 ret;

+	for(int i = 0; i < 4; i++)

+	for(int j = 0; j < 4; j++)

+		ret.d[i*4 + j] = dotv4(

+			getrow(a, j),

+			getcol(b, i)

+		);

+	return ret;

+}

+static inline vec4 mat4xvec4( mat4 t,  vec4 v){

+	uint i = 0;

+	vec4 vr;

+	vr.d[0] = 	t.d[0*4+i] * v.d[0] +

+				t.d[1*4+i] * v.d[1] +

+				t.d[2*4+i] * v.d[2] +

+				t.d[3*4+i] * v.d[3];

+	i++;

+	vr.d[1] = 	t.d[0*4+i] * v.d[0] +

+				t.d[1*4+i] * v.d[1] +

+				t.d[2*4+i] * v.d[2] +

+				t.d[3*4+i] * v.d[3];

+	i++;

+	vr.d[2] = 	t.d[0*4+i] * v.d[0] +

+				t.d[1*4+i] * v.d[1] +

+				t.d[2*4+i] * v.d[2] +

+				t.d[3*4+i] * v.d[3];

+	i++;

+	vr.d[3] = 	t.d[0*4+i] * v.d[0] +

+				t.d[1*4+i] * v.d[1] +

+				t.d[2*4+i] * v.d[2] +

+				t.d[3*4+i] * v.d[3];

+	return vr;

+}

+static inline vec3 crossv3( vec3 a,  vec3 b){

+	vec3 retval;

+	retval.d[0] = a.d[1] * b.d[2] - a.d[2] * b.d[1];

+	retval.d[1] = a.d[2] * b.d[0] - a.d[0] * b.d[2];

+	retval.d[2] = a.d[0] * b.d[1] - a.d[1] * b.d[0];

+	return retval;

+}

+static inline vec3 scalev3( f_ s,  vec3 i){i.d[0] *= s; i.d[1] *= s; i.d[2] *= s; return i;}

+static inline vec4 scalev4( f_ s,  vec4 i){i.d[0] *= s; i.d[1] *= s; i.d[2] *= s;i.d[3] *= s; return i;}

+static inline vec3 normalizev3( vec3 a){

+  	if(lengthv3(a)==0) return (vec3){.d[0]=0.0,.d[1]=0.0,.d[2]=1.0};

+	return scalev3(1.0/lengthv3(a), a);

+}

+static inline vec4 normalizev4( vec4 a){

+  	if(lengthv4(a)==0) return (vec4){.d[0]=0.0,.d[1]=0.0,.d[2]=1.0,.d[3]=0.0};

+	return scalev4(1.0/lengthv4(a), a);

+}

+static inline vec3 addv3( vec3 aa,  vec3 b){

+	vec3 a = aa;

+	a.d[0] += b.d[0]; a.d[1] += b.d[1]; a.d[2] += b.d[2]; return a;

+}

+static inline vec3 rotatev3( vec3 in,  vec3 axis,  f_ ang){

+	vec3 t1 = scalev3(cosf(ang),in);

+	vec3 t2 = scalev3(sinf(ang),crossv3(axis,in));

+	vec3 t3 = scalev3((1-cosf(ang))*dotv3(axis,in),axis);

+	return addv3(t1,addv3(t2,t3));

+}

+static inline vec4 addv4( vec4 aa,  vec4 b){

+	vec4 a = aa;

+	a.d[0] += b.d[0]; a.d[1] += b.d[1]; a.d[2] += b.d[2]; a.d[3] += b.d[3]; return a;

+}

+static inline vec3 subv3( vec3 a,  vec3 b){

+	return addv3(a,scalev3(-1,b));

+}

+static inline mat4 identitymat4(){

+	return scalemat4(

+		(vec4){.d[0]=1.0,.d[1]=1.0,.d[2]=1.0,.d[3]=1.0}

+	);

+}

+static inline mat4 translate( vec3 t){

+	mat4 tm = identitymat4();

+	tm.d[3*4+0] = t.d[0];

+	tm.d[3*4+1] = t.d[1];

+	tm.d[3*4+2] = t.d[2];

+	return tm;

+}

+static inline vec4 subv4( vec4 a,  vec4 b){

+	return addv4(a,scalev4(-1,b));

+}

+static inline vec3 reflect( vec3 in,  vec3 norm){

+	return

+	addv3(in, //I +

+		scalev3(-2.0*dotv3(norm, in), //-2.0 * dotv3(norm,in) *

+			norm //N

+		)

+	);

+}

+static inline vec4 upv3( vec3 in,  f_ w){

+	return (vec4){

+		.d[0]=in.d[0],

+		.d[1]=in.d[1],

+		.d[2]=in.d[2],

+		.d[3]=w

+	};

+}

+static inline vec3 downv4( vec4 in){

+	return (vec3){

+		.d[0]=in.d[0],

+		.d[1]=in.d[1],

+		.d[2]=in.d[2]

+	};

+}

+static inline mat4 lookAt( vec3 eye,  vec3 at,  vec3 up){

+	mat4 cw = identitymat4();

+	vec3 zaxis = normalizev3(subv3(at,eye));

+	vec3 xaxis = normalizev3(crossv3(zaxis,up));

+	vec3 yaxis = crossv3(xaxis, zaxis);

+	zaxis = scalev3(-1,zaxis);

+	cw.d[0*4+0] = xaxis.d[0];

+	cw.d[1*4+0] = xaxis.d[1];

+	cw.d[2*4+0] = xaxis.d[2];

+	cw.d[3*4+0] = -dotv3(xaxis,eye);

+	cw.d[0*4+1] = yaxis.d[0];

+	cw.d[1*4+1] = yaxis.d[1];

+	cw.d[2*4+1] = yaxis.d[2];

+	cw.d[3*4+1] = -dotv3(yaxis,eye);

+	cw.d[0*4+2] = zaxis.d[0];

+	cw.d[1*4+2] = zaxis.d[1];

+	cw.d[2*4+2] = zaxis.d[2];

+	cw.d[3*4+2] = -dotv3(zaxis,eye);

+	cw.d[0*4+3] = 0;

+	cw.d[1*4+3] = 0;

+	cw.d[2*4+3] = 0;

+	cw.d[3*4+3] = 1;

+	return cw;

+}

+//Collision detection

+//These Algorithms return the penetration vector into

+//the shape in the first argument

+//With depth of penetration in element 4

+//if depth of penetration is zero or lower then there is no penetration.

+static inline vec4 spherevsphere( vec4 s1,  vec4 s2){ //x,y,z,radius

+	vec4 ret;

+	vec3 diff = subv3(

+				downv4(s2),

+				downv4(s1)

+			);

+	float lv3 = lengthv3(diff);

+	float l = (s1.d[3] + s2.d[3]-lv3);

+	if(l < 0 || lv3 == 0) {

+		ret.d[3] = 0;return ret;

+	}

+	ret = upv3(

+		scalev3(

+			l/lv3,diff

+		)

+		,l

+	);

+	return ret;

+}

+static inline vec4 boxvbox( aabb b1,  aabb b2){ //Just points along the minimum separating axis, Nothing fancy.

+	vec4 ret = (vec4){

+		.d[0]=0,

+		.d[1]=0,

+		.d[2]=0,

+		.d[3]=0

+	};

+	vec3 sumextents = addv3(b1.e,b2.e);

+	vec3 b1c = downv4(b1.c);

+	vec3 b2c = downv4(b2.c);

+	vec3 b1min = subv3(b1c,b1.e);

+	vec3 b2min = subv3(b2c,b2.e);

+	vec3 b1max = addv3(b1c,b1.e);

+	vec3 b2max = addv3(b2c,b2.e);

+	if(

+		!(

+			(fabs(b1c.d[0] - b2c.d[0]) <= sumextents.d[0]) &&

+			(fabs(b1c.d[1] - b2c.d[1]) <= sumextents.d[1]) &&

+			(fabs(b1c.d[2] - b2c.d[2]) <= sumextents.d[2])

+		)

+	){

+		return ret;

+	}

+	vec3 axispen[2];

+	axispen[0] = subv3(b1max,b2min);

+	axispen[1] = subv3(b1min,b2max);

+	ret.d[3] = axispen[0].d[0];

+	ret.d[0] = axispen[0].d[0];

+	for(int i = 1; i < 6; i++){

+		if(fabs(axispen[i/3].d[i%3]) < fabs(ret.d[3])){

+			ret = (vec4){

+						.d[0]=0,

+						.d[1]=0,

+						.d[2]=0,

+						.d[3]=(axispen[i/3].d[i%3])

+					};

+			ret.d[i%3] = ret.d[3];

+			ret.d[3] = fabs(ret.d[3]);

+		}

+	}

+	return ret;

+}

+static inline vec3 closestpointAABB( aabb b,  vec3 p){

+	vec3 b1min = subv3(downv4(b.c),b.e);

+	vec3 b1max = addv3(downv4(b.c),b.e);

+	return clampvec3(p,b1min,b1max);

+}

+static inline vec4 spherevaabb( vec4 sph,  aabb box){

+	vec4 ret;

+	vec3 p = closestpointAABB(box,downv4(sph));

+	vec3 v = subv3(p,downv4(sph));

+	f_ d2 = dotv3(v,v);

+	if(d2 <= sph.d[3] * sph.d[3]){

+		f_ len = lengthv3(v);

+		f_ diff = (sph.d[3] - len);

+		if(len > 0){

+			f_ factor = diff/len;

+			vec3 bruh = scalev3(factor, v);

+			ret = upv3(bruh, diff);

+			return ret;

+		} else {

+			aabb virt;

+			virt.c = sph;

+			virt.e.d[0] = sph.d[3];

+			virt.e.d[1] = sph.d[3];

+			virt.e.d[2] = sph.d[3];

+			return boxvbox(virt,box);

+		}

+	}

+	else

+		return (vec4){

+			.d[0]=0,

+			.d[1]=0,

+			.d[2]=0,

+			.d[3]=0

+		};

+}

+//end of chad math impl

+//END Math_Library.h~~~~~~~~~~~~~~~~~~~~

+#endif

--- /dev/null

+++ b/include-demo/api_audio.h

@@ -1,0 +1,97 @@

+/* Public Domain / CC0 Audio Playback Mini Library

+Written by Gek (DMHSW) in 2020

+*/

+/*

+HOW TO BUILD THINGS USING THIS LIBRARY

+#define CHAD_API_IMPL

+//^ This line goes in the file you want the "implementation" in.

+#include "api_audio.h"

+*/

+#define USE_MIXER

+#define USE_MP3

+//#ifdef __TINYC__

+//#define STBI_NO_SIMD

+//#define SDL_DISABLE_IMMINTRIN_H

+//#endif

+#include <stdio.h>

+#include <stdlib.h>

+#include <math.h>

+#define SDL_MAIN_HANDLED

+#include <SDL/SDL.h>

+//NOTE: you might need to change these depending on your project structure.

+#ifdef CHAD_API_IMPL

+#define CHAD_MATH_IMPL

+#endif //

+#include "3dMath.h"

+//#include "../../include/fixedmath.h"

+typedef unsigned char uchar;

+extern uint R_;

+extern uint G_;

+extern uint B_;

+extern uint A_;

+#ifdef USE_MIXER

+#include<SDL/SDL_mixer.h>

+void ainit(int needsSDLINIT);

+void acleanup();

+typedef Mix_Chunk samp;

+typedef Mix_Music track;

+samp* lwav(const char* t);

+track* lmus(const char* t);

+samp* qlwav(Uint8* w);

+int aplay(samp* samp, int loops);

+void aPos(int chan, int angle, unsigned char dist);

+void aHalt(int chan);

+int mplay(track* mus,int loops, int ms);

+void mhalt();

+#ifdef CHAD_API_IMPL

+void ainit(int needsSDLINIT){

+	if(needsSDLINIT)

+		if (SDL_Init(SDL_INIT_AUDIO)!=0) //We only use SDL for mixer...

+		{

+			fprintf(stderr, "SDL_Init Error: %s\n", SDL_GetError());

+			exit(0);

+		}

+	Mix_Init(MIX_INIT_OGG | MIX_INIT_MP3);

+	if(-1 == Mix_OpenAudio(44100, MIX_DEFAULT_FORMAT, 2, 1024)) {printf("\nAudio can't init :(");exit(2);}

+}

+void acleanup(){

+	Mix_CloseAudio();

+	Mix_Quit();

+	SDL_Quit();

+}

+void mhalt(){Mix_HaltMusic();}

+void aHalt(int chan){Mix_HaltChannel(chan);}

+samp* lwav(const char* t){return Mix_LoadWAV(t);}

+track* lmus(const char* t){return Mix_LoadMUS(t);}

+samp* qlwav(Uint8* w){return Mix_QuickLoad_WAV(w);}

+int aplay(samp* samp, int loops){return Mix_PlayChannel(-1, samp, loops);}

+void aPos(int chan, int angle, unsigned char dist){Mix_SetPosition(chan,angle,dist);}

+int mplay(track* mus,int loops, int ms){return Mix_FadeInMusic(mus,loops,ms);}

+//end of chad api impl

+#endif

+//end of USE_MIXER

+#endif

+#define MAX(x,y) (x>y?x:y)

+#define MIN(x,y) (x<y?x:y)

+#define CHAD_API_NEAR 0.0

--- /dev/null

+++ b/include-demo/chadphys.h

@@ -1,0 +1,106 @@

+#ifndef CHAD_PHYS_H

+#define CHAD_PHYS_H

+#ifdef CHAD_PHYS_IMPL

+#define CHAD_MATH_IMPL

+#endif

+#include "3dMath.h"

+typedef struct {

+	aabb shape; //c.d[3] is sphere radius.

+		//if it's zero or less, it's not a sphere, it's a box

+	f_ mass; //0 means kinematic, or static. Defaults to zero.

+	f_ bounciness; //default 0, put portion of displacement into velocity.

+	f_ airfriction; //default 1, multiplied by velocity every time timestep.

+	f_ friction; //default 0.1

+	vec3 r; //Rotation, Used for rendering only

+	vec3 v; //velocity

+	vec3 a; //Body specific acceleration, combined with gravity

+	void* d; //User defined pointer.

+} phys_body;

+typedef struct{

+	phys_body* abodies; //mass non-zero

+	phys_body* sbodies; //mass zero

+	uint nabodies; //number of abodies

+	uint nsbodies; //number of sbodies

+	vec3 g; //gravity

+	f_ ms; //max speed

+} phys_world;

+void stepPhysWorld(phys_world* world);

+void resolveBodies(phys_body* a, phys_body* b);

+void initPhysWorld(phys_world* world); //inits to NULL

+void initPhysBody(phys_body* body); //inits to defaults specified above.

+#ifdef CHAD_PHYS_IMPL

+//TODO: implement functions

+void initPhysBody(phys_body* body){

+	body->shape = (aabb){

+		.c=(vec4){.d[0] = 0,.d[1] = 0,.d[2] = 0,.d[3] = 0},

+		.e=(vec3){.d[0] = 0,.d[1] = 0,.d[2] = 0}

+	};

+	body->mass = 0;

+	body->bounciness = 0;

+	body->friction = 0.1;

+	body->airfriction = 1.0;

+	body->a = (vec3){.d[0] = 0,.d[1] = 0,.d[2] = 0};

+	body->r = (vec3){.d[0] = 0,.d[1] = 0,.d[2] = 0};

+	body->d = NULL;

+}

+//Check for and, if necessary, resolve colliding bodies.

+void resolveBodies(phys_body* a, phys_body* b){

+	if(a->mass <= 0 && b->mass <= 0) return; //Perform a preliminary check. Do we even have to do anything?

+	//Check if the two bodies are colliding.

+	vec4 penvec = (vec4){

+		.d[0]=0,

+		.d[1]=0,

+		.d[2]=0,

+		.d[3]=0

+	};

+	if(a->shape.c.d[3] > 0 && b->shape.c.d[3] > 0) //Both Spheres!

+	{

+		penvec = spherevsphere(a->shape.c, b->shape.c);

+	} else if(a->shape.c.d[3] <= 0 && b->shape.c.d[3] <= 0) //Both boxes!

+	{

+		penvec = boxvbox(a->shape,b->shape);

+	} else if (a->shape.c.d[3] > 0 && b->shape.c.d[3] <= 0) //a is a sphere, b is a box

+	{

+		penvec = spherevaabb(a->shape.c,b->shape);

+	} else if (a->shape.c.d[3] <= 0 && b->shape.c.d[3] > 0){ //a is a box, b is a sphere

+		penvec = spherevaabb(b->shape.c,a->shape);

+		penvec.d[0] *= -1;

+		penvec.d[1] *= -1;

+		penvec.d[2] *= -1;

+	} else {

+		puts("\nInvalid configuration. Error.\n");

+	}

+	if(penvec.d[3] <= 0) return; //No penetration detected, or invalid configuration.

+	//We now have the penetration vector. There is a penetration.

+	//determine how much each should be displaced by.

+	//The penvec points INTO A and is of length penvec.d[3]

+	float bdisplacefactor = a->mass / (a->mass + b->mass), adisplacefactor = b->mass / (a->mass + b->mass);

+	if(!(a->mass > 0)) {adisplacefactor = 0; bdisplacefactor = 1;}

+	if(!(b->mass > 0)) {bdisplacefactor = 0; adisplacefactor = 1;}

+	vec3 avel = a->v;

+	vec3 bvel = b->v;

+	vec3 arelvel = subv3(a->v, b->v);

+	vec3 brelvel = subv3(b->v, a->v);

+	if(a->mass > 0){

+		vec4 displacea = scalev4(-adisplacefactor, penvec); //Note: SSE will accelerate a 4-lane multiply better than 3.

+		a->shape.c.d[0] += displacea.d[0];

+		a->shape.c.d[1] += displacea.d[1];

+		a->shape.c.d[2] += displacea.d[2];

+		a->v = addv3(scalev3(1.0-a->friction, arelvel),bvel); //Apply friction!

+		a->v = addv3(a->v, scalev3( a->bounciness, downv4(displacea) ) );

+	}

+	if(b->mass > 0){

+		vec4 displaceb = scalev4(bdisplacefactor, penvec); //The vector returned by collision functions points INTO B!

+		b->shape.c.d[0] += displaceb.d[0];

+		b->shape.c.d[1] += displaceb.d[1];

+		b->shape.c.d[2] += displaceb.d[2];

+		b->v = addv3(scalev3(1.0 - b->friction, brelvel),avel);

+		b->v = addv3(b->v, scalev3( b->bounciness, downv4(displaceb) ) );

+	}

+}

+#endif

+#endif

--- /dev/null

+++ b/include-demo/lockstepthread.h

@@ -1,0 +1,134 @@

+/* Public Domain / CC0 3d Lock-Step Threading Implementation

+Written by Gek (DMHSW) in 2020

+*/

+#ifndef LOCKSTEPTHREAD_H

+#define LOCKSTEPTHREAD_H

+#include <pthread.h>

+#include <stdlib.h>

+typedef struct {

+	pthread_mutex_t myMutex;

+	pthread_barrier_t myBarrier;

+	pthread_t myThread;

+	int isThreadLive;

+	int shouldKillThread;

+	int state;

+	void (*execute)(void*);

+	void* argument;

+} lsthread;

+void init_lsthread(lsthread* t);

+void start_lsthread(lsthread* t);

+void kill_lsthread(lsthread* t);

+void destroy_lsthread(lsthread* t);

+void lock(lsthread* t);

+void step(lsthread* t);

+void* lsthread_func(void* me_void);

+#ifdef LOCKSTEPTHREAD_IMPL

+//function declarations

+void init_lsthread(lsthread* t){

+	t->myMutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;

+	pthread_barrier_init(&t->myBarrier, NULL, 2);

+	t->isThreadLive = 0;

+	t->shouldKillThread = 0;

+	t->state = 0;

+	t->execute = NULL;

+	t->argument = NULL;

+}

+void destroy_lsthread(lsthread* t){

+	pthread_mutex_destroy(&t->myMutex);

+	pthread_barrier_destroy(&t->myBarrier);

+}

+void lock(lsthread* t){

+	if(t->state == 1)return;//if already locked, nono

+	if(!t->isThreadLive)return;

+	//exit(1)

+	pthread_barrier_wait(&t->myBarrier);

+	//exit(1)

+	if(pthread_mutex_lock(&t->myMutex))

+		exit(1);

+	t->state = 1;

+	//exit(1)

+}

+void step(lsthread* t){

+	if(t->state == -1)return; //if already stepping, nono

+	if(!t->isThreadLive)return;

+	//exit(1)

+	if(pthread_mutex_unlock(&(t->myMutex)))

+		exit(1);

+	//exit(1)

+	pthread_barrier_wait(&t->myBarrier);

+	t->state = -1;

+	//exit(1)

+}

+void kill_lsthread(lsthread* t){

+	if(!t->isThreadLive)return;

+	//exit(1)

+	if(t->state != 1){

+		lock(t);

+		//exit(1)

+	}

+	t->shouldKillThread = 1;

+	step(t);

+	//exit(1)

+	pthread_join(t->myThread,NULL);

+	//if(pthread_kill(t->myThread)){

+	//	exit(1)

+	//}

+	t->isThreadLive = 0;

+	t->shouldKillThread = 0;

+}

+void* lsthread_func(void* me_void){

+	lsthread* me = (lsthread*) me_void;

+	int ret = 0;

+	if (!me)pthread_exit(NULL);

+	while (1) {

+		//ret = pthread_cond_wait(&(me->myCond), &(me->myMutex));

+		pthread_barrier_wait(&me->myBarrier);

+		//exit(1)

+		pthread_mutex_lock(&me->myMutex);

+		//exit(1)

+		//if(ret)pthread_exit(NULL);

+		if (!(me->shouldKillThread) && me->execute)

+			me->execute(me->argument);

+		else if(me->shouldKillThread){

+			pthread_mutex_unlock(&me->myMutex);

+			//exit(1)

+			//pthread_barrier_wait(&me->myBarrier);

+			//exit(1)

+			pthread_exit(NULL);

+		}

+		//exit(1)

+		pthread_mutex_unlock(&me->myMutex);

+		//exit(1)

+		pthread_barrier_wait(&me->myBarrier);

+		//exit(1)

+	}

+	pthread_exit(NULL);

+}

+void start_lsthread(lsthread* t){

+	if(t->isThreadLive)return;

+	t->isThreadLive = 1;

+	t->shouldKillThread = 0;

+	if(pthread_mutex_lock(&t->myMutex))

+		exit(1);

+	t->state = 1; //LOCKED

+	pthread_create(

+		&t->myThread,

+		NULL,

+		lsthread_func,

+		(void*)t

+	);

+}

+#endif

+//end of implementation

+#endif

+//end of header

--- /dev/null

+++ b/include-demo/openimgui.h

@@ -1,0 +1,247 @@

+#include <math.h>

+//PROTOTYPE FOR THE OPENIMGUISTANDARD PROPOSAL

+//Licensed to you under the CC0 license.

+//This is the standard for an intuitive immediate-mode gui specification which gracefully solves many of the shortcomings of

+//other immediate mode gui standards.

+//1) How elements are drawn across different environments

+//2) How keyboard/gamepad cursor navigation is handled

+//3) How the same GUI rendering code can be transported between backends.

+//This is a standard for immediate mode GUI elements which can be implemented anywhere and gracefully decreases in feature level based on platform.

+//If your target platform can render text and it can render boxes, then it can run openimgui.

+// The screen's top left corner is 0,0 and bottom right is 1,1

+// All coordinates and dimensions are specified relative to that.

+//HOW CURSOR BUTTON IS HANDLED~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+//Beginning of your frame...

+//omg_cb = 0;

+//if(just_touched || just_mouseleftbuttondown || just_button_down) omg_cb = 1; //Pressed!

+//if(just_released_touch || just_mouseleftbutton up || just_button_up) omg_cb = 2; //Released!

+//Gui code this frame...

+//HOW CURSOR POSITION IS HANDLED:

+// On platforms with touch or mouse input, the polling of cursor position will occur like this~~~~~~~~~~~~~

+// omg_cursor_has_been_sucked = 0;

+// omg_cursorpos[0] = device_cursorpos.x / (float) screenWidth;

+// omg_cursorpos[1] = device_cursorpos.x / (float) screenHeight;

+// Clamp the cursorpos (if necessary)

+// omg_cursorpos[0] = omg_clampf(omg_cursorpos[0]);

+// omg_cursorpos[1] = omg_clampf(omg_cursorpos[1]);

+// omg_cursorpos_presuck[0] = -1;

+// omg_cursorpos_presuck[1] = -1;

+// On platforms which use buttons to navigate menu elements...~~~~~~~~~~~~~

+// omg_cursor_has_been_sucked = 0;

+// if(buttonleft) omg_cursorpos[0] -= omg_buttonjump[0];

+// if(buttonright) omg_cursorpos[0] += omg_buttonjump[0];

+// if(buttonup) omg_cursorpos[1] -= omg_buttonjump[1];

+// if(buttondown) omg_cursorpos[1] += omg_buttonjump[1];

+// Clamp the cursorpos

+// omg_cursorpos[0] = omg_wrapf(omg_cursorpos[0]);

+// omg_cursorpos[1] = omg_wrapf(omg_cursorpos[1]);

+// omg_cursorpos_presuck[0] = omg_cursorpos[0];

+// omg_cursorpos_presuck[1] = omg_cursorpos[1];

+// HOW BUTTON SUCKING WORKS ~~~~~~~~~~~~~~

+// On platforms without cursor input such as game consoles, there needs to be an ergonomic way to navigate menus.

+// This is achieved by simulating a virtual mouse cursor in the game and "Sucking" it into the closest sucking box.

+// We keep track of the cursorposition every frame as well as the position before an attempt to "suck" it has been made.

+// This allows us to determine (By testing, for every graphical object) whether or not the cursorposition should be "sucked" into

+// the graphical object.

+// Normalized cursor position

+#ifndef OPENIMGUI_IMPL

+extern float omg_cursorpos[2]; //Defaults to zero

+extern float omg_cursorpos_presuck[2]; //Defaults to zero

+extern int omg_cursor_has_been_sucked;

+extern int omg_cursor_was_inside;  //Set

+extern float omg_buttonjump[2]; //Defaults to zero

+// Setting for users using

+extern int bstate_old;

+extern int udlr_old[4];

+// cursor button

+extern int omg_cb; //Set to zero every iteration.

+#else

+float omg_cursorpos[2]; //Defaults to zero

+float omg_cursorpos_presuck[2]; //Defaults to zero

+int omg_cursor_has_been_sucked;

+int omg_cursor_was_inside;  //Set

+float omg_buttonjump[2]; //Defaults to zero

+// Setting for users using

+int bstate_old = 0;

+int udlr_old[4] = {0,0,0,0};

+// cursor button

+int omg_cb; //Set to zero every iteration.

+#endif

+//Used for determining the closest button in sucking mode.

+static inline float omg_sqrlinelength(float x1, float y1, float x2, float y2){

+	return ((x1-x2) * (x1-x2) + (y1-y2) * (y1-y2));

+}

+//Used for clamping cursor position to the screen.

+static inline float omg_clampf(float x){

+	return (x>1.0)?1.0: (x<0.0)?0.0:x;

+}

+//Used for wrapping the cursor position to the screen in button cursor mode.

+static inline float omg_wrapf(float x){

+	float f = fmod(x, 1);

+	if(f<0.0) (f = 1.0 + f);

+	return f;

+}

+static inline void omg_update_keycursor(int _up, int _down, int _left, int _right, int bstate){

+	omg_cursor_was_inside = 0;

+	int up = _up && ! udlr_old[0];

+	int down = _down && ! udlr_old[1];

+	int left = _left && ! udlr_old[2];

+	int right = _right && ! udlr_old[3];

+	udlr_old[0] = _up;

+	udlr_old[1] = _down;

+	udlr_old[2] = _left;

+	udlr_old[3] = _right;

+	omg_cursor_has_been_sucked = 0;

+	omg_cursorpos_presuck[0] = omg_cursorpos[0];

+	omg_cursorpos_presuck[1] = omg_cursorpos[1];

+	if(up)   omg_cursorpos[1] -= omg_buttonjump[1];

+	if(down) omg_cursorpos[1] += omg_buttonjump[1];

+	if(left) omg_cursorpos[0] -= omg_buttonjump[0];

+	if(right)omg_cursorpos[0] += omg_buttonjump[0];

+	//Clamp the cursorpos

+	omg_cursorpos[0] = omg_wrapf(omg_cursorpos[0]);

+	omg_cursorpos[1] = omg_wrapf(omg_cursorpos[1]);

+	omg_cursorpos_presuck[0] = omg_cursorpos[0];

+	omg_cursorpos_presuck[1] = omg_cursorpos[1];

+	//printf("BEGIN! Cx = %f, Cy = %f\n", omg_cursorpos[0], omg_cursorpos[1]);

+	omg_cb = 0;

+	if(bstate && !bstate_old) omg_cb = 1;

+	else if (!bstate && bstate_old) omg_cb = 2;

+	bstate_old = bstate;

+}

+//for mouse cursors and touch input.

+static inline void omg_update_mcursor(float ncx, float ncy, int bstate){

+	omg_cursor_has_been_sucked = 0;

+	omg_cursor_was_inside = 0;

+	omg_cursorpos[0] = ncx;

+	omg_cursorpos[1] = ncy;

+	// Clamp the cursorpos (if necessary)

+	omg_cursorpos[0] = omg_clampf(omg_cursorpos[0]);

+	omg_cursorpos[1] = omg_clampf(omg_cursorpos[1]);

+	omg_cursorpos_presuck[0] = -1;

+	omg_cursorpos_presuck[1] = -1;

+	omg_cb = 0;

+	if(bstate && !bstate_old) omg_cb = 1;

+	else if (!bstate && bstate_old) omg_cb = 2;

+	bstate_old = bstate;

+}

+static inline int omg_boxtest(float x, float y, float xdim, float ydim, float cx, float cy){

+	if((x <= cx) &&

+			(x+xdim >= cx) &&

+			(y <= cy) &&

+			(y+ydim >= cy))

+		return 1;

+	return 0;

+}

+static inline int omg_box_retval(float x, float y, float xdim, float ydim){

+	if(omg_cursorpos_presuck[0] == -1)

+		return omg_boxtest(x,y,xdim,ydim,	omg_cursorpos[0],omg_cursorpos[1]);

+	return omg_boxtest(x,y,xdim,ydim,		omg_cursorpos_presuck[0],omg_cursorpos_presuck[1]);

+}

+static inline void omg_box_suck(float x, float y, float xdim, float ydim, int sucks, float buttonjumpx, float buttonjumpy){

+	 if(omg_cursorpos_presuck[0] != -1 && sucks){ //Do not attempt to suck if this graphical element does not suck or sucking is not enabled.

+		int btest = omg_boxtest(x,y,xdim,ydim, omg_cursorpos_presuck[0], omg_cursorpos_presuck[1]);

+		 if(!omg_cursor_has_been_sucked){

+		 	//We are free to try to suck up the cursor without a check.

+			omg_cursorpos[0] = x + xdim/2.0;

+			omg_cursorpos[1] = y + ydim/2.0;

+			omg_cursor_has_been_sucked = 1;

+		  	omg_buttonjump[0] = buttonjumpx;

+		  	omg_buttonjump[1] = buttonjumpy;

+		  	if(btest) omg_cursor_was_inside = 1;

+		  	//puts("Initial grab...\n");

+		  	//printf("Cx = %f, Cy = %f\n", omg_cursorpos[0], omg_cursorpos[1]);

+		} else if (

+		(!omg_cursor_was_inside && //Cursor was not inside.

+		omg_sqrlinelength(x+xdim/2.0, y+ydim/2.0, 			omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1]) <

+		           omg_sqrlinelength(omg_cursorpos[0], omg_cursorpos[1], omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1])

+		 ) || //Cursor was inside, if it's inside this one as well, pick the closest.

+		 (!omg_cursor_was_inside && btest) ||

+		  (

+		  	btest &&

+			omg_sqrlinelength(x+xdim/2.0, y+ydim/2.0, 			omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1]) <

+		    omg_sqrlinelength(omg_cursorpos[0], omg_cursorpos[1], omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1])

+		  )

+		           ){

+		           //The box is closer than the current suck position.

+			omg_cursorpos[0] = x+xdim/2.0;

+			omg_cursorpos[1] = y+ydim/2.0;

+			omg_cursor_has_been_sucked = 1;

+		  omg_buttonjump[0] = buttonjumpx;

+		  omg_buttonjump[1] = buttonjumpy;

+		  //if(boxtest(x,y,xdim,ydim)) omg_cursor_was_inside = 1;

+		  omg_cursor_was_inside = omg_boxtest(x,y,xdim,ydim, omg_cursorpos_presuck[0], omg_cursorpos_presuck[1]);

+		  //puts("Found a different button!\n");

+		  //printf("Cx = %f, Cy = %f\n", omg_cursorpos[0], omg_cursorpos[1]);

+		}

+	}

+}

+// OMG_BOX:

+// Draws a box on the screen.

+// Returns whether or not the cursor was inside it this frame (NOT IF IT GOT __SUCKED__ INSIDE IT!)

+// x,y are the top left corner.

+// xdim, ydim, are the width and height of the box.

+// hints is a set of implementation-specific parameters describing the nature of how the box is drawn,

+// sucks indicates whether or not the cursor position is "sucked" into the button (See: HOW BUTTON SUCKING WORKS)

+// buttonjumpx and buttonjumpy are the amount by which the cursor will jump in X and Y when pressing the menu navigation arrows.

+// The return value is determined like this:

+// if(omg_cursorpos_presuck[0] == -1) return omg_boxtest(omg_cursorpos) else

+//	return boxtest(omg_cursorpos_presuck)

+// The suck test works like this:

+// if(omg_cursorpos_presuck[0] != -1 && sucks){ //Do not attempt to suck if this graphical element does not suck or sucking is not enabled.

+// if(!omg_cursor_has_been_sucked){ //We are free to try to suck up the cursor without a check.

+//	omg_cursorpos[0] = x+xdim/2.0;

+//	omg_cursorpos[1] = y+ydim/2.0;

+//	omg_cursor_has_been_sucked = 1;

+//  omg_buttonjump[0] = buttonjumpx;

+//  omg_buttonjump[1] = buttonjumpy;

+//} else if (omg_sqrlinelength(x+xdim/2.0, y+ydim/2.0, omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1]) <

+//           omg_sqrlinelength(omg_cursorpos[0], omg_cursorpos[1], omg_cursorpos_presuck[0],  omg_cursorpos_presuck[1])){ //The box is closer than the current suck position.

+//	omg_cursorpos[0] = x+xdim/2.0;

+//	omg_cursorpos[1] = y+ydim/2.0;

+//	omg_cursor_has_been_sucked = 1;

+//  omg_buttonjump[0] = buttonjumpx;

+//  omg_buttonjump[1] = buttonjumpy;

+//}}

+//When sucking is enabled (omg_cursorpos_presuck[0] != -1) the box test will be performed on cursorpos_presuck.

+//You can use the above static inline functions as a reference for your implementation.

+int omg_box(float x, float y, float xdim, float ydim, int sucks, float buttonjumpx, float buttonjumpy, int hints);

+// OMG_TEXTBOX:

+// Draws a box... with text in it

+// All the args are the same, and its return value is the same, except now it can draw text.

+// It should handle all the same hints as omg_box.

+// the hintstext variable should handle all

+// The textsize is an implementation-specific indication of how large the text in the box should be.

+// The x and y dimensions of the box are automatically deduced from text.

+// Text containing newlines will extend the Y dimension of the box,

+// and the longest line of text will determine the x dimension of the box.

+// Otherwise, it is functionally identical to omg_box.

+int omg_textbox(float x, float y, const char* text, int textsize, int sucks, float buttonjumpx, float buttonjumpy, int hints, int hintstext);

--- /dev/null

+++ b/include-demo/resweep.h

@@ -1,0 +1,307 @@

+//unlicense'd

+/*

+This is free and unencumbered software released into the public domain.

+Anyone is free to copy, modify, publish, use, compile, sell, or

+distribute this software, either in source code form or as a compiled

+binary, for any purpose, commercial or non-commercial, and by any

+means.

+In jurisdictions that recognize copyright laws, the author or authors

+of this software dedicate any and all copyright interest in the

+software to the public domain. We make this dedication for the benefit

+of the public at large and to the detriment of our heirs and

+successors. We intend this dedication to be an overt act of

+relinquishment in perpetuity of all present and future rights to this

+software under copyright law.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.

+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR

+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,

+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

+OTHER DEALINGS IN THE SOFTWARE.

+For more information, please refer to <http://unlicense.org>

+*/

+#pragma once

+#ifdef __cplusplus

+extern "C" {

+#endif

+/******************************************************************************/

+/******************************************************************************/

+#ifdef __cplusplus

+}

+#endif

+#ifdef RESWEEP_IMPLEMENTATION

+#include <math.h>

+#ifndef M_PI

+#define M_PI   3.14159265358979323846

+#endif

+#ifndef M_1_PI

+#define	M_1_PI 0.31830988618379067154

+#endif

+#define SIDELOBE_HEIGHT 96

+#define UP_TRANSITION_WIDTH (1.0 / 32.0)

+#define DOWN_TRANSITION_WIDTH (1.0 / 128.0)

+#define MAX_SINC_WINDOW_SIZE 2048

+#define RESAMPLE_LUT_STEP 128

+typedef struct

+{

+	float value;

+	float delta;

+}

+lutEntry_t;

+lutEntry_t dynamicLut[RESAMPLE_LUT_STEP * MAX_SINC_WINDOW_SIZE];

+static inline unsigned int calc_gcd(unsigned int a, unsigned int b)

+{

+	while (b)

+	{

+		unsigned int t = b;

+		b = a % b;

+		a = t;

+	}

+	return a;

+}

+static inline double exact_nsinc(double x)

+{

+	if (x == 0.0)

+		return 1.0;

+	return ((double)(M_1_PI) / x) * sin(M_PI * x);

+}

+// Modified Bessel function of the first kind, order 0

+// https://ccrma.stanford.edu/~jos/sasp/Kaiser_Window.html

+static inline double I0(double x)

+{

+	double r = 1.0, xx = x * x, xpow = xx, coeff = 0.25;

+	int k;

+	// iterations until coeff ~= 0

+	// 19 for float32, 89 for float64, 880 for float80

+	for (k = 1; k < 89; k++)

+	{

+		r += xpow * coeff;

+		coeff /= (4 * k + 8) * k + 4;

+		xpow *= xx;

+	}

+	return r;

+}

+// https://ccrma.stanford.edu/~jos/sasp/Kaiser_Window.html

+static inline double kaiser(int n, int length, double beta)

+{

+	double mid = 2 * n / (double)(length - 1) - 1.0;

+	return I0(beta * sqrt(1.0 - mid * mid)) / I0(beta);

+}

+static inline void sinc_resample_createLut(int inFreq, int cutoffFreq2, int windowSize, double beta)

+{

+	double windowLut[windowSize];

+	double freqAdjust = (double)cutoffFreq2 / (double)inFreq;

+	lutEntry_t *out, *in;

+	int i, j;

+	for (i = 0; i < windowSize; i++)

+		windowLut[i] = kaiser(i, windowSize, beta);

+	out = dynamicLut;

+	for (i = 0; i < RESAMPLE_LUT_STEP; i++)

+	{

+		double offset = i / (double)(RESAMPLE_LUT_STEP - 1) - windowSize / 2;

+		double sum = 0.0;

+		for (j = 0; j < windowSize; j++)

+		{

+			double s = exact_nsinc((j + offset) * freqAdjust);

+			out->value = s * windowLut[j];

+			sum += s;

+			out++;

+		}

+		out -= windowSize;

+		for (j = 0; j < windowSize; j++)

+		{

+			out->value /= sum;

+			out++;

+		}

+	}

+	out = dynamicLut;

+	in = out + windowSize;

+	for (i = 0; i < RESAMPLE_LUT_STEP - 1; i++)

+	{

+		for (j = 0; j < windowSize; j++)

+		{

+			out->delta = in->value - out->value;

+			out++;

+			in++;

+		}

+	}

+	for (j = 0; j < windowSize; j++)

+	{

+		out->delta = 0;

+		out++;

+	}

+}

+static inline void sinc_resample_internal(short *wavOut, int sizeOut, int outFreq, const short *wavIn, int sizeIn, int inFreq, int cutoffFreq2, int numChannels, int windowSize, double beta)

+{

+	float y[windowSize * numChannels];

+	const short *sampleIn, *wavInEnd = wavIn + (sizeIn / 2);

+	short *sampleOut, *wavOutEnd = wavOut + (sizeOut / 2);

+	float outPeriod;

+	int subpos = 0;

+	int gcd = calc_gcd(inFreq, outFreq);

+	int i, c, next;

+	float dither[numChannels];

+	sinc_resample_createLut(inFreq, cutoffFreq2, windowSize, beta);

+	inFreq /= gcd;

+	outFreq /= gcd;

+	outPeriod = 1.0f / outFreq;

+	for (c = 0; c < numChannels; c++)

+		dither[c] = 0.0f;

+	for (i = 0; i < windowSize / 2 - 1; i++)

+	{

+		for (c = 0; c < numChannels; c++)

+			y[i * numChannels + c] = 0;

+	}

+	sampleIn = wavIn;

+	for (; i < windowSize; i++)

+	{

+		for (c = 0; c < numChannels; c++)

+			y[i * numChannels + c] = (sampleIn < wavInEnd) ? *sampleIn++ : 0;

+	}

+	sampleOut = wavOut;

+	next = 0;

+	while (sampleOut < wavOutEnd)

+	{

+		float samples[numChannels];

+		float offset = 1.0f - subpos * outPeriod;

+		float interp;

+		lutEntry_t *lutPart;

+		int index;

+		for (c = 0; c < numChannels; c++)

+			samples[c] = 0.0f;

+		interp = offset * (RESAMPLE_LUT_STEP - 1);

+		index = interp;

+		interp -= index;

+		lutPart = dynamicLut + index * windowSize;

+		for (i = next; i < windowSize; i++, lutPart++)

+		{

+			float scale = lutPart->value + lutPart->delta * interp;

+			for (c = 0; c < numChannels; c++)

+				samples[c] += y[i * numChannels + c] * scale;

+		}

+		for (i = 0; i < next; i++, lutPart++)

+		{

+			float scale = lutPart->value + lutPart->delta * interp;

+			for (c = 0; c < numChannels; c++)

+				samples[c] += y[i * numChannels + c] * scale;

+		}

+		for (c = 0; c < numChannels; c++)

+		{

+			float r = roundf(samples[c] + dither[c]);

+			dither[c] += samples[c] - r;

+			if (r > 32767)

+				*sampleOut++ = 32767;

+			else if (r < -32768)

+				*sampleOut++ = -32768;

+			else

+				*sampleOut++ = r;

+		}

+		subpos += inFreq;

+		while (subpos >= outFreq)

+		{

+			subpos -= outFreq;

+			for (c = 0; c < numChannels; c++)

+				y[next * numChannels + c] = (sampleIn < wavInEnd) ? *sampleIn++ : 0;

+			next = (next + 1) % windowSize;

+		}

+	}

+}

+void sinc_resample(short *wavOut, int sizeOut, int outFreq, const short *wavIn, int sizeIn, int inFreq, int numChannels)

+{

+	double sidelobeHeight = SIDELOBE_HEIGHT;

+	double transitionWidth;

+	double beta = 0.0;

+	int cutoffFreq2;

+	int windowSize;

+	// Just copy if no resampling necessary

+	if (outFreq == inFreq)

+	{

+		memcpy(wavOut, wavIn, (sizeOut < sizeIn) ? sizeOut : sizeIn);

+		return;

+	}

+	transitionWidth = (outFreq > inFreq) ? UP_TRANSITION_WIDTH : DOWN_TRANSITION_WIDTH;

+	// cutoff freq is ideally half transition width away from output freq

+	cutoffFreq2 = outFreq - transitionWidth * inFreq * 0.5;

+	// FIXME: Figure out why there are bad effects with cutoffFreq2 > inFreq

+	if (cutoffFreq2 > inFreq)

+		cutoffFreq2 = inFreq;

+	// https://www.mathworks.com/help/signal/ug/kaiser-window.html

+	if (sidelobeHeight > 50)

+		beta = 0.1102 * (sidelobeHeight - 8.7);

+	else if (sidelobeHeight >= 21)

+		beta = 0.5842 * pow(sidelobeHeight - 21.0, 0.4) + 0.07886 * (sidelobeHeight - 21.0);

+	windowSize = (sidelobeHeight - 8.0) / (2.285 * transitionWidth * M_PI) + 1;

+	if (windowSize > MAX_SINC_WINDOW_SIZE)

+		windowSize = MAX_SINC_WINDOW_SIZE;

+	// should compile as different paths

+	// number of channels need to be compiled as separate paths to ensure good

+	// vectorization by the compiler

+	if (numChannels == 1)

+		sinc_resample_internal(wavOut, sizeOut, outFreq, wavIn, sizeIn, inFreq, cutoffFreq2, 1, windowSize, beta);

+	else if (numChannels == 2)

+		sinc_resample_internal(wavOut, sizeOut, outFreq, wavIn, sizeIn, inFreq, cutoffFreq2, 2, windowSize, beta);

+	else

+		sinc_resample_internal(wavOut, sizeOut, outFreq, wavIn, sizeIn, inFreq, cutoffFreq2, numChannels, windowSize, beta);

+}

+#endif // RESWEEP_IMPLEMENTATION

--- /dev/null

+++ b/include-demo/stb_ds.h

@@ -1,0 +1,1880 @@

+/* stb_ds.h - v0.65 - public domain data structures - Sean Barrett 2019

+   This is a single-header-file library that provides easy-to-use

+   dynamic arrays and hash tables for C (also works in C++).

+   For a gentle introduction:

+      http://nothings.org/stb_ds

+   To use this library, do this in *one* C or C++ file:

+      #define STB_DS_IMPLEMENTATION

+      #include "stb_ds.h"

+TABLE OF CONTENTS

+  Table of Contents

+  Compile-time options

+  License

+  Documentation

+  Notes

+  Notes - Dynamic arrays

+  Notes - Hash maps

+  Credits

+COMPILE-TIME OPTIONS

+  #define STBDS_NO_SHORT_NAMES

+     This flag needs to be set globally.

+     By default stb_ds exposes shorter function names that are not qualified

+     with the "stbds_" prefix. If these names conflict with the names in your

+     code, define this flag.

+  #define STBDS_SIPHASH_2_4

+     This flag only needs to be set in the file containing #define STB_DS_IMPLEMENTATION.

+     By default stb_ds.h hashes using a weaker variant of SipHash and a custom hash for

+     4- and 8-byte keys. On 64-bit platforms, you can define the above flag to force

+     stb_ds.h to use specification-compliant SipHash-2-4 for all keys. Doing so makes

+     hash table insertion about 20% slower on 4- and 8-byte keys, 5% slower on

+     64-byte keys, and 10% slower on 256-byte keys on my test computer.

+  #define STBDS_REALLOC(context,ptr,size) better_realloc

+  #define STBDS_FREE(context,ptr)         better_free

+     These defines only need to be set in the file containing #define STB_DS_IMPLEMENTATION.

+     By default stb_ds uses stdlib realloc() and free() for memory management. You can

+     substitute your own functions instead by defining these symbols. You must either

+     define both, or neither. Note that at the moment, 'context' will always be NULL.

+     @TODO add an array/hash initialization function that takes a memory context pointer.

+  #define STBDS_UNIT_TESTS

+     Defines a function stbds_unit_tests() that checks the functioning of the data structures.

+  Note that on older versions of gcc (e.g. 5.x.x) you may need to build with '-std=c++0x'

+     (or equivalentally '-std=c++11') when using anonymous structures as seen on the web

+     page or in STBDS_UNIT_TESTS.

+LICENSE

+  Placed in the public domain and also MIT licensed.

+  See end of file for detailed license information.

+DOCUMENTATION

+  Dynamic Arrays

+    Non-function interface:

+      Declare an empty dynamic array of type T

+        T* foo = NULL;

+      Access the i'th item of a dynamic array 'foo' of type T, T* foo:

+        foo[i]

+    Functions (actually macros)

+      arrfree:

+        void arrfree(T*);

+          Frees the array.

+      arrlen:

+        ptrdiff_t arrlen(T*);

+          Returns the number of elements in the array.

+      arrlenu:

+        size_t arrlenu(T*);

+          Returns the number of elements in the array as an unsigned type.

+      arrpop:

+        T arrpop(T* a)

+          Removes the final element of the array and returns it.

+      arrput:

+        T arrput(T* a, T b);

+          Appends the item b to the end of array a. Returns b.

+      arrins:

+        T arrins(T* a, int p, T b);

+          Inserts the item b into the middle of array a, into a[p],

+          moving the rest of the array over. Returns b.

+      arrinsn:

+        void arrins(T* a, int p, int n);

+          Inserts n uninitialized items into array a starting at a[p],

+          moving the rest of the array over.

+      arraddnptr:

+        T* arraddnptr(T* a, int n)

+          Appends n uninitialized items onto array at the end.

+          Returns a pointer to the first uninitialized item added.

+      arraddnindex:

+        size_t arraddnindex(T* a, int n)

+          Appends n uninitialized items onto array at the end.

+          Returns the index of the first uninitialized item added.

+      arrdel:

+        void arrdel(T* a, int p);

+          Deletes the element at a[p], moving the rest of the array over.

+      arrdeln:

+        void arrdel(T* a, int p, int n);

+          Deletes n elements starting at a[p], moving the rest of the array over.

+      arrdelswap:

+        void arrdelswap(T* a, int p);

+          Deletes the element at a[p], replacing it with the element from

+          the end of the array. O(1) performance.

+      arrsetlen:

+        void arrsetlen(T* a, int n);

+          Changes the length of the array to n. Allocates uninitialized

+          slots at the end if necessary.

+      arrsetcap:

+        size_t arrsetcap(T* a, int n);

+          Sets the length of allocated storage to at least n. It will not

+          change the length of the array.

+      arrcap:

+        size_t arrcap(T* a);

+          Returns the number of total elements the array can contain without

+          needing to be reallocated.

+  Hash maps & String hash maps

+    Given T is a structure type: struct { TK key; TV value; }. Note that some

+    functions do not require TV value and can have other fields. For string

+    hash maps, TK must be 'char *'.

+    Special interface:

+      stbds_rand_seed:

+        void stbds_rand_seed(size_t seed);

+          For security against adversarially chosen data, you should seed the

+          library with a strong random number. Or at least seed it with time().

+      stbds_hash_string:

+        size_t stbds_hash_string(char *str, size_t seed);

+          Returns a hash value for a string.

+      stbds_hash_bytes:

+        size_t stbds_hash_bytes(void *p, size_t len, size_t seed);

+          These functions hash an arbitrary number of bytes. The function

+          uses a custom hash for 4- and 8-byte data, and a weakened version

+          of SipHash for everything else. On 64-bit platforms you can get

+          specification-compliant SipHash-2-4 on all data by defining

+          STBDS_SIPHASH_2_4, at a significant cost in speed.

+    Non-function interface:

+      Declare an empty hash map of type T

+        T* foo = NULL;

+      Access the i'th entry in a hash table T* foo:

+        foo[i]

+    Function interface (actually macros):

+      hmfree

+      shfree

+        void hmfree(T*);

+        void shfree(T*);

+          Frees the hashmap and sets the pointer to NULL.

+      hmlen

+      shlen

+        ptrdiff_t hmlen(T*)

+        ptrdiff_t shlen(T*)

+          Returns the number of elements in the hashmap.

+      hmlenu

+      shlenu

+        size_t hmlenu(T*)

+        size_t shlenu(T*)

+          Returns the number of elements in the hashmap.

+      hmgeti

+      shgeti

+      hmgeti_ts

+        ptrdiff_t hmgeti(T*, TK key)

+        ptrdiff_t shgeti(T*, char* key)

+        ptrdiff_t hmgeti_ts(T*, TK key, ptrdiff_t tempvar)

+          Returns the index in the hashmap which has the key 'key', or -1

+          if the key is not present.

+      hmget

+      hmget_ts

+      shget

+        TV hmget(T*, TK key)

+        TV shget(T*, char* key)

+        TV hmget_ts(T*, TK key, ptrdiff_t tempvar)

+          Returns the value corresponding to 'key' in the hashmap.

+          The structure must have a 'value' field

+      hmgets

+      shgets

+        T hmgets(T*, TK key)

+        T shgets(T*, char* key)

+          Returns the structure corresponding to 'key' in the hashmap.

+      hmgetp

+      shgetp

+      hmgetp_ts

+      hmgetp_null

+      shgetp_null

+        T* hmgetp(T*, TK key)

+        T* shgetp(T*, char* key)

+        T* hmgetp_ts(T*, TK key, ptrdiff_t tempvar)

+        T* hmgetp_null(T*, TK key)

+        T* shgetp_null(T*, char *key)

+          Returns a pointer to the structure corresponding to 'key' in

+          the hashmap. Functions ending in "_null" return NULL if the key

+          is not present in the hashmap; the others return a pointer to a

+          structure holding the default value (but not the searched-for key).

+      hmdefault

+      shdefault

+        TV hmdefault(T*, TV value)

+        TV shdefault(T*, TV value)

+          Sets the default value for the hashmap, the value which will be

+          returned by hmget/shget if the key is not present.

+      hmdefaults

+      shdefaults

+        TV hmdefaults(T*, T item)

+        TV shdefaults(T*, T item)

+          Sets the default struct for the hashmap, the contents which will be

+          returned by hmgets/shgets if the key is not present.

+      hmput

+      shput

+        TV hmput(T*, TK key, TV value)

+        TV shput(T*, char* key, TV value)

+          Inserts a <key,value> pair into the hashmap. If the key is already

+          present in the hashmap, updates its value.

+      hmputs

+      shputs

+        T hmputs(T*, T item)

+        T shputs(T*, T item)

+          Inserts a struct with T.key into the hashmap. If the struct is already

+          present in the hashmap, updates it.

+      hmdel

+      shdel

+        int hmdel(T*, TK key)

+        int shdel(T*, char* key)

+          If 'key' is in the hashmap, deletes its entry and returns 1.

+          Otherwise returns 0.

+    Function interface (actually macros) for strings only:

+      sh_new_strdup

+        void sh_new_strdup(T*);

+          Overwrites the existing pointer with a newly allocated

+          string hashmap which will automatically allocate and free

+          each string key using realloc/free

+      sh_new_arena

+        void sh_new_arena(T*);

+          Overwrites the existing pointer with a newly allocated

+          string hashmap which will automatically allocate each string

+          key to a string arena. Every string key ever used by this

+          hash table remains in the arena until the arena is freed.

+          Additionally, any key which is deleted and reinserted will

+          be allocated multiple times in the string arena.

+NOTES

+  * These data structures are realloc'd when they grow, and the macro

+    "functions" write to the provided pointer. This means: (a) the pointer

+    must be an lvalue, and (b) the pointer to the data structure is not

+    stable, and you must maintain it the same as you would a realloc'd

+    pointer. For example, if you pass a pointer to a dynamic array to a

+    function which updates it, the function must return back the new

+    pointer to the caller. This is the price of trying to do this in C.

+  * The following are the only functions that are thread-safe on a single data

+    structure, i.e. can be run in multiple threads simultaneously on the same

+    data structure

+        hmlen        shlen

+        hmlenu       shlenu

+        hmget_ts     shget_ts

+        hmgeti_ts    shgeti_ts

+        hmgets_ts    shgets_ts

+  * You iterate over the contents of a dynamic array and a hashmap in exactly

+    the same way, using arrlen/hmlen/shlen:

+      for (i=0; i < arrlen(foo); ++i)

+         ... foo[i] ...

+  * All operations except arrins/arrdel are O(1) amortized, but individual

+    operations can be slow, so these data structures may not be suitable

+    for real time use. Dynamic arrays double in capacity as needed, so

+    elements are copied an average of once. Hash tables double/halve

+    their size as needed, with appropriate hysteresis to maintain O(1)

+    performance.

+NOTES - DYNAMIC ARRAY

+  * If you know how long a dynamic array is going to be in advance, you can avoid

+    extra memory allocations by using arrsetlen to allocate it to that length in

+    advance and use foo[n] while filling it out, or arrsetcap to allocate the memory

+    for that length and use arrput/arrpush as normal.

+  * Unlike some other versions of the dynamic array, this version should

+    be safe to use with strict-aliasing optimizations.

+NOTES - HASH MAP

+  * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel

+    and variants, the key must be an lvalue (so the macro can take the address of it).

+    Extensions are used that eliminate this requirement if you're using C99 and later

+    in GCC or clang, or if you're using C++ in GCC. But note that this can make your

+    code less portable.

+  * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'.

+  * The iteration order of your data in the hashmap is determined solely by the

+    order of insertions and deletions. In particular, if you never delete, new

+    keys are always added at the end of the array. This will be consistent

+    across all platforms and versions of the library. However, you should not

+    attempt to serialize the internal hash table, as the hash is not consistent

+    between different platforms, and may change with future versions of the library.

+  * Use sh_new_arena() for string hashmaps that you never delete from. Initialize

+    with NULL if you're managing the memory for your strings, or your strings are

+    never freed (at least until the hashmap is freed). Otherwise, use sh_new_strdup().

+    @TODO: make an arena variant that garbage collects the strings with a trivial

+    copy collector into a new arena whenever the table shrinks / rebuilds. Since

+    current arena recommendation is to only use arena if it never deletes, then

+    this can just replace current arena implementation.

+  * If adversarial input is a serious concern and you're on a 64-bit platform,

+    enable STBDS_SIPHASH_2_4 (see the 'Compile-time options' section), and pass

+    a strong random number to stbds_rand_seed.

+  * The default value for the hash table is stored in foo[-1], so if you

+    use code like 'hmget(T,k)->value = 5' you can accidentally overwrite

+    the value stored by hmdefault if 'k' is not present.

+CREDITS

+  Sean Barrett -- library, idea for dynamic array API/implementation

+  Per Vognsen  -- idea for hash table API/implementation

+  Rafael Sachetto -- arrpop()

+  github:HeroicKatora -- arraddn() reworking

+  Bugfixes:

+    Andy Durdin

+    Shane Liesegang

+    Vinh Truong

+    Andreas Molzer

+    github:hashitaku

+    github:srdjanstipic

+*/

+#ifdef STBDS_UNIT_TESTS

+#define _CRT_SECURE_NO_WARNINGS

+#endif

+#ifndef INCLUDE_STB_DS_H

+#define INCLUDE_STB_DS_H

+#include <stddef.h>

+#include <string.h>

+#ifndef STBDS_NO_SHORT_NAMES

+#define arrlen      stbds_arrlen

+#define arrlenu     stbds_arrlenu

+#define arrput      stbds_arrput

+#define arrpush     stbds_arrput

+#define arrpop      stbds_arrpop

+#define arrfree     stbds_arrfree

+#define arraddn     stbds_arraddn // deprecated, use one of the following instead:

+#define arraddnptr  stbds_arraddnptr

+#define arraddnindex stbds_arraddnindex

+#define arrsetlen   stbds_arrsetlen

+#define arrlast     stbds_arrlast

+#define arrins      stbds_arrins

+#define arrinsn     stbds_arrinsn

+#define arrdel      stbds_arrdel

+#define arrdeln     stbds_arrdeln

+#define arrdelswap  stbds_arrdelswap

+#define arrcap      stbds_arrcap

+#define arrsetcap   stbds_arrsetcap

+#define hmput       stbds_hmput

+#define hmputs      stbds_hmputs

+#define hmget       stbds_hmget

+#define hmget_ts    stbds_hmget_ts

+#define hmgets      stbds_hmgets

+#define hmgetp      stbds_hmgetp

+#define hmgetp_ts   stbds_hmgetp_ts

+#define hmgetp_null stbds_hmgetp_null

+#define hmgeti      stbds_hmgeti

+#define hmgeti_ts   stbds_hmgeti_ts

+#define hmdel       stbds_hmdel

+#define hmlen       stbds_hmlen

+#define hmlenu      stbds_hmlenu

+#define hmfree      stbds_hmfree

+#define hmdefault   stbds_hmdefault

+#define hmdefaults  stbds_hmdefaults

+#define shput       stbds_shput

+#define shputi      stbds_shputi

+#define shputs      stbds_shputs

+#define shget       stbds_shget

+#define shgeti      stbds_shgeti

+#define shgets      stbds_shgets

+#define shgetp      stbds_shgetp

+#define shgetp_null stbds_shgetp_null

+#define shdel       stbds_shdel

+#define shlen       stbds_shlen

+#define shlenu      stbds_shlenu

+#define shfree      stbds_shfree

+#define shdefault   stbds_shdefault

+#define shdefaults  stbds_shdefaults

+#define sh_new_arena  stbds_sh_new_arena

+#define sh_new_strdup stbds_sh_new_strdup

+#define stralloc    stbds_stralloc

+#define strreset    stbds_strreset

+#endif

+#if defined(STBDS_REALLOC) && !defined(STBDS_FREE) || !defined(STBDS_REALLOC) && defined(STBDS_FREE)

+#error "You must define both STBDS_REALLOC and STBDS_FREE, or neither."

+#endif

+#if !defined(STBDS_REALLOC) && !defined(STBDS_FREE)

+#include <stdlib.h>

+#define STBDS_REALLOC(c,p,s) realloc(p,s)

+#define STBDS_FREE(c,p)      free(p)

+#endif

+#ifdef _MSC_VER

+#define STBDS_NOTUSED(v)  (void)(v)

+#else

+#define STBDS_NOTUSED(v)  (void)sizeof(v)

+#endif

+#ifdef __cplusplus

+extern "C" {

+#endif

+// for security against attackers, seed the library with a random number, at least time() but stronger is better

+extern void stbds_rand_seed(size_t seed);

+// these are the hash functions used internally if you want to test them or use them for other purposes

+extern size_t stbds_hash_bytes(void *p, size_t len, size_t seed);

+extern size_t stbds_hash_string(char *str, size_t seed);

+// this is a simple string arena allocator, initialize with e.g. 'stbds_string_arena my_arena={0}'.

+typedef struct stbds_string_arena stbds_string_arena;

+extern char * stbds_stralloc(stbds_string_arena *a, char *str);

+extern void   stbds_strreset(stbds_string_arena *a);

+// have to #define STBDS_UNIT_TESTS to call this

+extern void stbds_unit_tests(void);

+///////////////

+//

+// Everything below here is implementation details

+//

+extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap);

+extern void   stbds_hmfree_func(void *p, size_t elemsize);

+extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);

+extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode);

+extern void * stbds_hmput_default(void *a, size_t elemsize);

+extern void * stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);

+extern void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode);

+extern void * stbds_shmode_func(size_t elemsize, int mode);

+#ifdef __cplusplus

+}

+#endif

+#if defined(__GNUC__) || defined(__clang__)

+#define STBDS_HAS_TYPEOF

+#ifdef __cplusplus

+//#define STBDS_HAS_LITERAL_ARRAY  // this is currently broken for clang

+#endif

+#endif

+#if !defined(__cplusplus)

+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L

+#define STBDS_HAS_LITERAL_ARRAY

+#endif

+#endif

+// this macro takes the address of the argument, but on gcc/clang can accept rvalues

+#if defined(STBDS_HAS_LITERAL_ARRAY) && defined(STBDS_HAS_TYPEOF)

+  #if __clang__

+  #define STBDS_ADDRESSOF(typevar, value)     ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value

+  #else

+  #define STBDS_ADDRESSOF(typevar, value)     ((typeof(typevar)[1]){value}) // literal array decays to pointer to value

+  #endif

+#else

+#define STBDS_ADDRESSOF(typevar, value)     &(value)

+#endif

+#define STBDS_OFFSETOF(var,field)           ((char *) &(var)->field - (char *) (var))

+#define stbds_header(t)  ((stbds_array_header *) (t) - 1)

+#define stbds_temp(t)    stbds_header(t)->temp

+#define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table)

+#define stbds_arrsetcap(a,n)  (stbds_arrgrow(a,0,n))

+#define stbds_arrsetlen(a,n)  ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0)

+#define stbds_arrcap(a)       ((a) ? stbds_header(a)->capacity : 0)

+#define stbds_arrlen(a)       ((a) ? (ptrdiff_t) stbds_header(a)->length : 0)

+#define stbds_arrlenu(a)      ((a) ?             stbds_header(a)->length : 0)

+#define stbds_arrput(a,v)     (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v))

+#define stbds_arrpush         stbds_arrput  // synonym

+#define stbds_arrpop(a)       (stbds_header(a)->length--, (a)[stbds_header(a)->length])

+#define stbds_arraddn(a,n)    ((void)(stbds_arraddnoff(a, n)))    // deprecated, use one of the following instead:

+#define stbds_arraddnptr(a,n) (stbds_arrmaybegrow(a,n), stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)])

+#define stbds_arraddnoff(a,n) (stbds_arrmaybegrow(a,n), stbds_header(a)->length += (n), stbds_header(a)->length-(n))

+#define stbds_arrlast(a)      ((a)[stbds_header(a)->length-1])

+#define stbds_arrfree(a)      ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL)

+#define stbds_arrdel(a,i)     stbds_arrdeln(a,i,1)

+#define stbds_arrdeln(a,i,n)  (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n))

+#define stbds_arrdelswap(a,i) ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1)

+#define stbds_arrinsn(a,i,n)  (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i))))

+#define stbds_arrins(a,i,v)   (stbds_arrinsn((a),(i),1), (a)[i]=(v))

+#define stbds_arrmaybegrow(a,n)  ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \

+                                  ? (stbds_arrgrow(a,n,0),0) : 0)

+#define stbds_arrgrow(a,b,c)   ((a) = stbds_arrgrowf_wrapper((a), sizeof *(a), (b), (c)))

+#define stbds_hmput(t, k, v) \

+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, 0),   \

+     (t)[stbds_temp((t)-1)].key = (k),    \

+     (t)[stbds_temp((t)-1)].value = (v))

+#define stbds_hmputs(t, s) \

+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), &(s).key, sizeof (s).key, STBDS_HM_BINARY), \

+     (t)[stbds_temp((t)-1)] = (s))

+#define stbds_hmgeti(t,k) \

+    ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_HM_BINARY), \

+      stbds_temp((t)-1))

+#define stbds_hmgeti_ts(t,k,temp) \

+    ((t) = stbds_hmget_key_ts_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, &(temp), STBDS_HM_BINARY), \

+      (temp))

+#define stbds_hmgetp(t, k) \

+    ((void) stbds_hmgeti(t,k), &(t)[stbds_temp((t)-1)])

+#define stbds_hmgetp_ts(t, k, temp) \

+    ((void) stbds_hmgeti_ts(t,k,temp), &(t)[temp])

+#define stbds_hmdel(t,k) \

+    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_BINARY)),(t)?stbds_temp((t)-1):0)

+#define stbds_hmdefault(t, v) \

+    ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1].value = (v))

+#define stbds_hmdefaults(t, s) \

+    ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1] = (s))

+#define stbds_hmfree(p)        \

+    ((void) ((p) != NULL ? stbds_hmfree_func((p)-1,sizeof*(p)),0 : 0),(p)=NULL)

+#define stbds_hmgets(t, k)    (*stbds_hmgetp(t,k))

+#define stbds_hmget(t, k)     (stbds_hmgetp(t,k)->value)

+#define stbds_hmget_ts(t, k, temp)  (stbds_hmgetp_ts(t,k,temp)->value)

+#define stbds_hmlen(t)        ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0)

+#define stbds_hmlenu(t)       ((t) ?             stbds_header((t)-1)->length-1 : 0)

+#define stbds_hmgetp_null(t,k)  (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp(t)-1])

+#define stbds_shput(t, k, v) \

+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \

+     (t)[stbds_temp((t)-1)].value = (v))

+#define stbds_shputi(t, k, v) \

+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \

+     (t)[stbds_temp((t)-1)].value = (v), stbds_temp((t)-1))

+#define stbds_shputs(t, s) \

+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (s).key, sizeof (s).key, STBDS_HM_STRING), \

+     (t)[stbds_temp((t)-1)] = (s), \

+     (t)[stbds_temp((t)-1)].key = stbds_temp_key((t)-1)) // above line overwrites whole structure, so must rewrite key here if it was allocated internally

+#define stbds_pshput(t, p) \

+    ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (p)->key, sizeof (p)->key, STBDS_HM_PTR_TO_STRING), \

+     (t)[stbds_temp((t)-1)] = (p))

+#define stbds_shgeti(t,k) \

+     ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \

+      stbds_temp((t)-1))

+#define stbds_pshgeti(t,k) \

+     ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_HM_PTR_TO_STRING), \

+      stbds_temp((t)-1))

+#define stbds_shgetp(t, k) \

+    ((void) stbds_shgeti(t,k), &(t)[stbds_temp((t)-1)])

+#define stbds_pshget(t, k) \

+    ((void) stbds_pshgeti(t,k), (t)[stbds_temp((t)-1)])

+#define stbds_shdel(t,k) \

+    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_STRING)),(t)?stbds_temp((t)-1):0)

+#define stbds_pshdel(t,k) \

+    (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_OFFSETOF(*(t),key), STBDS_HM_PTR_TO_STRING)),(t)?stbds_temp((t)-1):0)

+#define stbds_sh_new_arena(t)  \

+    ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_ARENA))

+#define stbds_sh_new_strdup(t) \

+    ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_STRDUP))

+#define stbds_shdefault(t, v)  stbds_hmdefault(t,v)

+#define stbds_shdefaults(t, s) stbds_hmdefaults(t,s)

+#define stbds_shfree       stbds_hmfree

+#define stbds_shlenu       stbds_hmlenu

+#define stbds_shgets(t, k) (*stbds_shgetp(t,k))

+#define stbds_shget(t, k)  (stbds_shgetp(t,k)->value)

+#define stbds_shgetp_null(t,k)  (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp(t)-1])

+#define stbds_shlen        stbds_hmlen

+typedef struct

+{

+  size_t      length;

+  size_t      capacity;

+  void      * hash_table;

+  ptrdiff_t   temp;

+} stbds_array_header;

+typedef struct stbds_string_block

+{

+  struct stbds_string_block *next;

+  char storage[8];

+} stbds_string_block;

+struct stbds_string_arena

+{

+  stbds_string_block *storage;

+  size_t remaining;

+  unsigned char block;

+  unsigned char mode;  // this isn't used by the string arena itself

+};

+#define STBDS_HM_BINARY         0

+#define STBDS_HM_STRING         1

+enum

+{

+   STBDS_SH_NONE,

+   STBDS_SH_DEFAULT,

+   STBDS_SH_STRDUP,

+   STBDS_SH_ARENA

+};

+#ifdef __cplusplus

+// in C we use implicit assignment from these void*-returning functions to T*.

+// in C++ these templates make the same code work

+template<class T> static T * stbds_arrgrowf_wrapper(T *a, size_t elemsize, size_t addlen, size_t min_cap) {

+  return (T*)stbds_arrgrowf((void *)a, elemsize, addlen, min_cap);

+}

+template<class T> static T * stbds_hmget_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) {

+  return (T*)stbds_hmget_key((void*)a, elemsize, key, keysize, mode);

+}

+template<class T> static T * stbds_hmget_key_ts_wrapper(T *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) {

+  return (T*)stbds_hmget_key_ts((void*)a, elemsize, key, keysize, temp, mode);

+}

+template<class T> static T * stbds_hmput_default_wrapper(T *a, size_t elemsize) {

+  return (T*)stbds_hmput_default((void *)a, elemsize);

+}

+template<class T> static T * stbds_hmput_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) {

+  return (T*)stbds_hmput_key((void*)a, elemsize, key, keysize, mode);

+}

+template<class T> static T * stbds_hmdel_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode){

+  return (T*)stbds_hmdel_key((void*)a, elemsize, key, keysize, keyoffset, mode);

+}

+template<class T> static T * stbds_shmode_func_wrapper(T *, size_t elemsize, int mode) {

+  return (T*)stbds_shmode_func(elemsize, mode);

+}

+#else

+#define stbds_arrgrowf_wrapper            stbds_arrgrowf

+#define stbds_hmget_key_wrapper           stbds_hmget_key

+#define stbds_hmget_key_ts_wrapper        stbds_hmget_key_ts

+#define stbds_hmput_default_wrapper       stbds_hmput_default

+#define stbds_hmput_key_wrapper           stbds_hmput_key

+#define stbds_hmdel_key_wrapper           stbds_hmdel_key

+#define stbds_shmode_func_wrapper(t,e,m)  stbds_shmode_func(e,m)

+#endif

+#endif // INCLUDE_STB_DS_H

+//////////////////////////////////////////////////////////////////////////////

+//

+//   IMPLEMENTATION

+//

+#ifdef STB_DS_IMPLEMENTATION

+#include <assert.h>

+#include <string.h>

+#ifndef STBDS_ASSERT

+#define STBDS_ASSERT_WAS_UNDEFINED

+#define STBDS_ASSERT(x)   ((void) 0)

+#endif

+#ifdef STBDS_STATISTICS

+#define STBDS_STATS(x)   x

+size_t stbds_array_grow;

+size_t stbds_hash_grow;

+size_t stbds_hash_shrink;

+size_t stbds_hash_rebuild;

+size_t stbds_hash_probes;

+size_t stbds_hash_alloc;

+size_t stbds_rehash_probes;

+size_t stbds_rehash_items;

+#else

+#define STBDS_STATS(x)

+#endif

+//

+// stbds_arr implementation

+//

+//int *prev_allocs[65536];

+//int num_prev;

+void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)

+{

+  void *b;

+  size_t min_len = stbds_arrlen(a) + addlen;

+  // compute the minimum capacity needed

+  if (min_len > min_cap)

+    min_cap = min_len;

+  if (min_cap <= stbds_arrcap(a))

+    return a;

+  // increase needed capacity to guarantee O(1) amortized

+  if (min_cap < 2 * stbds_arrcap(a))

+    min_cap = 2 * stbds_arrcap(a);

+  else if (min_cap < 4)

+    min_cap = 4;

+  //if (num_prev < 65536) if (a) prev_allocs[num_prev++] = (int *) ((char *) a+1);

+  //if (num_prev == 2201)

+  //  num_prev = num_prev;

+  b = STBDS_REALLOC(NULL, (a) ? stbds_header(a) : 0, elemsize * min_cap + sizeof(stbds_array_header));

+  //if (num_prev < 65536) prev_allocs[num_prev++] = (int *) (char *) b;

+  b = (char *) b + sizeof(stbds_array_header);

+  if (a == NULL) {

+    stbds_header(b)->length = 0;

+    stbds_header(b)->hash_table = 0;

+  } else {

+    STBDS_STATS(++stbds_array_grow);

+  }

+  stbds_header(b)->capacity = min_cap;

+  return b;

+}

+//

+// stbds_hm hash table implementation

+//

+#ifdef STBDS_INTERNAL_SMALL_BUCKET

+#define STBDS_BUCKET_LENGTH      4

+#else

+#define STBDS_BUCKET_LENGTH      8

+#endif

+#define STBDS_BUCKET_SHIFT      (STBDS_BUCKET_LENGTH == 8 ? 3 : 2)

+#define STBDS_BUCKET_MASK       (STBDS_BUCKET_LENGTH-1)

+#define STBDS_CACHE_LINE_SIZE   64

+#define STBDS_ALIGN_FWD(n,a)   (((n) + (a) - 1) & ~((a)-1))

+typedef struct

+{

+   size_t    hash [STBDS_BUCKET_LENGTH];

+   ptrdiff_t index[STBDS_BUCKET_LENGTH];

+} stbds_hash_bucket; // in 32-bit, this is one 64-byte cache line; in 64-bit, each array is one 64-byte cache line

+typedef struct

+{

+  char * temp_key; // this MUST be the first field of the hash table

+  size_t slot_count;

+  size_t used_count;

+  size_t used_count_threshold;

+  size_t used_count_shrink_threshold;

+  size_t tombstone_count;

+  size_t tombstone_count_threshold;

+  size_t seed;

+  size_t slot_count_log2;

+  stbds_string_arena string;

+  stbds_hash_bucket *storage; // not a separate allocation, just 64-byte aligned storage after this struct

+} stbds_hash_index;

+#define STBDS_INDEX_EMPTY    -1

+#define STBDS_INDEX_DELETED  -2

+#define STBDS_INDEX_IN_USE(x)  ((x) >= 0)

+#define STBDS_HASH_EMPTY      0

+#define STBDS_HASH_DELETED    1

+static size_t stbds_hash_seed=0x31415926;

+void stbds_rand_seed(size_t seed)

+{

+  stbds_hash_seed = seed;

+}

+#define stbds_load_32_or_64(var, temp, v32, v64_hi, v64_lo)                                          \

+  temp = v64_lo ^ v32, temp <<= 16, temp <<= 16, temp >>= 16, temp >>= 16, /* discard if 32-bit */   \

+  var = v64_hi, var <<= 16, var <<= 16,                                    /* discard if 32-bit */   \

+  var ^= temp ^ v32

+#define STBDS_SIZE_T_BITS           ((sizeof (size_t)) * 8)

+static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2)

+{

+  size_t pos;

+  STBDS_NOTUSED(slot_log2);

+  pos = hash & (slot_count-1);

+  #ifdef STBDS_INTERNAL_BUCKET_START

+  pos &= ~STBDS_BUCKET_MASK;

+  #endif

+  return pos;

+}

+static size_t stbds_log2(size_t slot_count)

+{

+  size_t n=0;

+  while (slot_count > 1) {

+    slot_count >>= 1;

+    ++n;

+  }

+  return n;

+}

+static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_index *ot)

+{

+  stbds_hash_index *t;

+  t = (stbds_hash_index *) STBDS_REALLOC(NULL,0,(slot_count >> STBDS_BUCKET_SHIFT) * sizeof(stbds_hash_bucket) + sizeof(stbds_hash_index) + STBDS_CACHE_LINE_SIZE-1);

+  t->storage = (stbds_hash_bucket *) STBDS_ALIGN_FWD((size_t) (t+1), STBDS_CACHE_LINE_SIZE);

+  t->slot_count = slot_count;

+  t->slot_count_log2 = stbds_log2(slot_count);

+  t->tombstone_count = 0;

+  t->used_count = 0;

+  #if 0 // A1

+  t->used_count_threshold        = slot_count*12/16; // if 12/16th of table is occupied, grow

+  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild

+  t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink

+  #elif 1 // A2

+  //t->used_count_threshold        = slot_count*12/16; // if 12/16th of table is occupied, grow

+  //t->tombstone_count_threshold   = slot_count* 3/16; // if tombstones are 3/16th of table, rebuild

+  //t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink

+  // compute without overflowing

+  t->used_count_threshold        = slot_count - (slot_count>>2);

+  t->tombstone_count_threshold   = (slot_count>>3) + (slot_count>>4);

+  t->used_count_shrink_threshold = slot_count >> 2;

+  #elif 0 // B1

+  t->used_count_threshold        = slot_count*13/16; // if 13/16th of table is occupied, grow

+  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild

+  t->used_count_shrink_threshold = slot_count* 5/16; // if table is only 5/16th full, shrink

+  #else // C1

+  t->used_count_threshold        = slot_count*14/16; // if 14/16th of table is occupied, grow

+  t->tombstone_count_threshold   = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild

+  t->used_count_shrink_threshold = slot_count* 6/16; // if table is only 6/16th full, shrink

+  #endif

+  // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2

+    // Note that the larger tables have high variance as they were run fewer times

+  //     A1            A2          B1           C1

+  //    0.10ms :     0.10ms :     0.10ms :     0.11ms :      2,000 inserts creating 2K table

+  //    0.96ms :     0.95ms :     0.97ms :     1.04ms :     20,000 inserts creating 20K table

+  //   14.48ms :    14.46ms :    10.63ms :    11.00ms :    200,000 inserts creating 200K table

+  //  195.74ms :   196.35ms :   203.69ms :   214.92ms :  2,000,000 inserts creating 2M table

+  // 2193.88ms :  2209.22ms :  2285.54ms :  2437.17ms : 20,000,000 inserts creating 20M table

+  //   65.27ms :    53.77ms :    65.33ms :    65.47ms : 500,000 inserts & deletes in 2K table

+  //   72.78ms :    62.45ms :    71.95ms :    72.85ms : 500,000 inserts & deletes in 20K table

+  //   89.47ms :    77.72ms :    96.49ms :    96.75ms : 500,000 inserts & deletes in 200K table

+  //   97.58ms :    98.14ms :    97.18ms :    97.53ms : 500,000 inserts & deletes in 2M table

+  //  118.61ms :   119.62ms :   120.16ms :   118.86ms : 500,000 inserts & deletes in 20M table

+  //  192.11ms :   194.39ms :   196.38ms :   195.73ms : 500,000 inserts & deletes in 200M table

+  if (slot_count <= STBDS_BUCKET_LENGTH)

+    t->used_count_shrink_threshold = 0;

+  // to avoid infinite loop, we need to guarantee that at least one slot is empty and will terminate probes

+  STBDS_ASSERT(t->used_count_threshold + t->tombstone_count_threshold < t->slot_count);

+  STBDS_STATS(++stbds_hash_alloc);

+  if (ot) {

+    t->string = ot->string;

+    // reuse old seed so we can reuse old hashes so below "copy out old data" doesn't do any hashing

+    t->seed = ot->seed;

+  } else {

+    size_t a,b,temp;

+    memset(&t->string, 0, sizeof(t->string));

+    t->seed = stbds_hash_seed;

+    // LCG

+    // in 32-bit, a =          2147001325   b =  715136305

+    // in 64-bit, a = 2862933555777941757   b = 3037000493

+    stbds_load_32_or_64(a,temp, 2147001325, 0x27bb2ee6, 0x87b0b0fd);

+    stbds_load_32_or_64(b,temp,  715136305,          0, 0xb504f32d);

+    stbds_hash_seed = stbds_hash_seed  * a + b;

+  }

+  {

+    size_t i,j;

+    for (i=0; i < slot_count >> STBDS_BUCKET_SHIFT; ++i) {

+      stbds_hash_bucket *b = &t->storage[i];

+      for (j=0; j < STBDS_BUCKET_LENGTH; ++j)

+        b->hash[j] = STBDS_HASH_EMPTY;

+      for (j=0; j < STBDS_BUCKET_LENGTH; ++j)

+        b->index[j] = STBDS_INDEX_EMPTY;

+    }

+  }

+  // copy out the old data, if any

+  if (ot) {

+    size_t i,j;

+    t->used_count = ot->used_count;

+    for (i=0; i < ot->slot_count >> STBDS_BUCKET_SHIFT; ++i) {

+      stbds_hash_bucket *ob = &ot->storage[i];

+      for (j=0; j < STBDS_BUCKET_LENGTH; ++j) {

+        if (STBDS_INDEX_IN_USE(ob->index[j])) {

+          size_t hash = ob->hash[j];

+          size_t pos = stbds_probe_position(hash, t->slot_count, t->slot_count_log2);

+          size_t step = STBDS_BUCKET_LENGTH;

+          STBDS_STATS(++stbds_rehash_items);

+          for (;;) {

+            size_t limit,z;

+            stbds_hash_bucket *bucket;

+            bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT];

+            STBDS_STATS(++stbds_rehash_probes);

+            for (z=pos & STBDS_BUCKET_MASK; z < STBDS_BUCKET_LENGTH; ++z) {

+              if (bucket->hash[z] == 0) {

+                bucket->hash[z] = hash;

+                bucket->index[z] = ob->index[j];

+                goto done;

+              }

+            }

+            limit = pos & STBDS_BUCKET_MASK;

+            for (z = 0; z < limit; ++z) {

+              if (bucket->hash[z] == 0) {

+                bucket->hash[z] = hash;

+                bucket->index[z] = ob->index[j];

+                goto done;

+              }

+            }

+            pos += step;                  // quadratic probing

+            step += STBDS_BUCKET_LENGTH;

+            pos &= (t->slot_count-1);

+          }

+        }

+       done:

+        ;

+      }

+    }

+  }

+  return t;

+}

+#define STBDS_ROTATE_LEFT(val, n)   (((val) << (n)) | ((val) >> (STBDS_SIZE_T_BITS - (n))))

+#define STBDS_ROTATE_RIGHT(val, n)  (((val) >> (n)) | ((val) << (STBDS_SIZE_T_BITS - (n))))

+size_t stbds_hash_string(char *str, size_t seed)

+{

+  size_t hash = seed;

+  while (*str)

+     hash = STBDS_ROTATE_LEFT(hash, 9) + (unsigned char) *str++;

+  // Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits

+  hash ^= seed;

+  hash = (~hash) + (hash << 18);

+  hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,31);

+  hash = hash * 21;

+  hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,11);

+  hash += (hash << 6);

+  hash ^= STBDS_ROTATE_RIGHT(hash,22);

+  return hash+seed;

+}

+#ifdef STBDS_SIPHASH_2_4

+#define STBDS_SIPHASH_C_ROUNDS 2

+#define STBDS_SIPHASH_D_ROUNDS 4

+typedef int STBDS_SIPHASH_2_4_can_only_be_used_in_64_bit_builds[sizeof(size_t) == 8 ? 1 : -1];

+#endif

+#ifndef STBDS_SIPHASH_C_ROUNDS

+#define STBDS_SIPHASH_C_ROUNDS 1

+#endif

+#ifndef STBDS_SIPHASH_D_ROUNDS

+#define STBDS_SIPHASH_D_ROUNDS 1

+#endif

+#ifdef _MSC_VER

+#pragma warning(push)

+#pragma warning(disable:4127) // conditional expression is constant, for do..while(0) and sizeof()==

+#endif

+static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed)

+{

+  unsigned char *d = (unsigned char *) p;

+  size_t i,j;

+  size_t v0,v1,v2,v3, data;

+  // hash that works on 32- or 64-bit registers without knowing which we have

+  // (computes different results on 32-bit and 64-bit platform)

+  // derived from siphash, but on 32-bit platforms very different as it uses 4 32-bit state not 4 64-bit

+  v0 = ((((size_t) 0x736f6d65 << 16) << 16) + 0x70736575) ^  seed;

+  v1 = ((((size_t) 0x646f7261 << 16) << 16) + 0x6e646f6d) ^ ~seed;

+  v2 = ((((size_t) 0x6c796765 << 16) << 16) + 0x6e657261) ^  seed;

+  v3 = ((((size_t) 0x74656462 << 16) << 16) + 0x79746573) ^ ~seed;

+  #ifdef STBDS_TEST_SIPHASH_2_4

+  // hardcoded with key material in the siphash test vectors

+  v0 ^= 0x0706050403020100ull ^  seed;

+  v1 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed;

+  v2 ^= 0x0706050403020100ull ^  seed;

+  v3 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed;

+  #endif

+  #define STBDS_SIPROUND() \

+    do {                   \

+      v0 += v1; v1 = STBDS_ROTATE_LEFT(v1, 13);  v1 ^= v0; v0 = STBDS_ROTATE_LEFT(v0,STBDS_SIZE_T_BITS/2); \

+      v2 += v3; v3 = STBDS_ROTATE_LEFT(v3, 16);  v3 ^= v2;                                                 \

+      v2 += v1; v1 = STBDS_ROTATE_LEFT(v1, 17);  v1 ^= v2; v2 = STBDS_ROTATE_LEFT(v2,STBDS_SIZE_T_BITS/2); \

+      v0 += v3; v3 = STBDS_ROTATE_LEFT(v3, 21);  v3 ^= v0;                                                 \

+    } while (0)

+  for (i=0; i+sizeof(size_t) <= len; i += sizeof(size_t), d += sizeof(size_t)) {

+    data = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);

+    data |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // discarded if size_t == 4

+    v3 ^= data;

+    for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j)

+      STBDS_SIPROUND();

+    v0 ^= data;

+  }

+  data = len << (STBDS_SIZE_T_BITS-8);

+  switch (len - i) {

+    case 7: data |= ((size_t) d[6] << 24) << 24; // fall through

+    case 6: data |= ((size_t) d[5] << 20) << 20; // fall through

+    case 5: data |= ((size_t) d[4] << 16) << 16; // fall through

+    case 4: data |= (d[3] << 24); // fall through

+    case 3: data |= (d[2] << 16); // fall through

+    case 2: data |= (d[1] << 8); // fall through

+    case 1: data |= d[0]; // fall through

+    case 0: break;

+  }

+  v3 ^= data;

+  for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j)

+    STBDS_SIPROUND();

+  v0 ^= data;

+  v2 ^= 0xff;

+  for (j=0; j < STBDS_SIPHASH_D_ROUNDS; ++j)

+    STBDS_SIPROUND();

+#ifdef STBDS_SIPHASH_2_4

+  return v0^v1^v2^v3;

+#else

+  return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply

+#endif

+}

+size_t stbds_hash_bytes(void *p, size_t len, size_t seed)

+{

+#ifdef STBDS_SIPHASH_2_4

+  return stbds_siphash_bytes(p,len,seed);

+#else

+  unsigned char *d = (unsigned char *) p;

+  if (len == 4) {

+    unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);

+    #if 0

+    // HASH32-A  Bob Jenkin's hash function w/o large constants

+    hash ^= seed;

+    hash -= (hash<<6);

+    hash ^= (hash>>17);

+    hash -= (hash<<9);

+    hash ^= seed;

+    hash ^= (hash<<4);

+    hash -= (hash<<3);

+    hash ^= (hash<<10);

+    hash ^= (hash>>15);

+    #elif 1

+    // HASH32-BB  Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts.

+    // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm

+    // not really sure what's going on.

+    hash ^= seed;

+    hash = (hash ^ 61) ^ (hash >> 16);

+    hash = hash + (hash << 3);

+    hash = hash ^ (hash >> 4);

+    hash = hash * 0x27d4eb2d;

+    hash ^= seed;

+    hash = hash ^ (hash >> 15);

+    #else  // HASH32-C   -  Murmur3

+    hash ^= seed;

+    hash *= 0xcc9e2d51;

+    hash = (hash << 17) | (hash >> 15);

+    hash *= 0x1b873593;

+    hash ^= seed;

+    hash = (hash << 19) | (hash >> 13);

+    hash = hash*5 + 0xe6546b64;

+    hash ^= hash >> 16;

+    hash *= 0x85ebca6b;

+    hash ^= seed;

+    hash ^= hash >> 13;

+    hash *= 0xc2b2ae35;

+    hash ^= hash >> 16;

+    #endif

+    // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2

+    // Note that the larger tables have high variance as they were run fewer times

+    //  HASH32-A   //  HASH32-BB  //  HASH32-C

+    //    0.10ms   //    0.10ms   //    0.10ms :      2,000 inserts creating 2K table

+    //    0.96ms   //    0.95ms   //    0.99ms :     20,000 inserts creating 20K table

+    //   14.69ms   //   14.43ms   //   14.97ms :    200,000 inserts creating 200K table

+    //  199.99ms   //  195.36ms   //  202.05ms :  2,000,000 inserts creating 2M table

+    // 2234.84ms   // 2187.74ms   // 2240.38ms : 20,000,000 inserts creating 20M table

+    //   55.68ms   //   53.72ms   //   57.31ms : 500,000 inserts & deletes in 2K table

+    //   63.43ms   //   61.99ms   //   65.73ms : 500,000 inserts & deletes in 20K table

+    //   80.04ms   //   77.96ms   //   81.83ms : 500,000 inserts & deletes in 200K table

+    //  100.42ms   //   97.40ms   //  102.39ms : 500,000 inserts & deletes in 2M table

+    //  119.71ms   //  120.59ms   //  121.63ms : 500,000 inserts & deletes in 20M table

+    //  185.28ms   //  195.15ms   //  187.74ms : 500,000 inserts & deletes in 200M table

+    //   15.58ms   //   14.79ms   //   15.52ms : 200,000 inserts creating 200K table with varying key spacing

+    return (((size_t) hash << 16 << 16) | hash) ^ seed;

+  } else if (len == 8 && sizeof(size_t) == 8) {

+    size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24);

+    hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4

+    hash ^= seed;

+    hash = (~hash) + (hash << 21);

+    hash ^= STBDS_ROTATE_RIGHT(hash,24);

+    hash *= 265;

+    hash ^= STBDS_ROTATE_RIGHT(hash,14);

+    hash ^= seed;

+    hash *= 21;

+    hash ^= STBDS_ROTATE_RIGHT(hash,28);

+    hash += (hash << 31);

+    hash = (~hash) + (hash << 18);

+    return hash;

+  } else {

+    return stbds_siphash_bytes(p,len,seed);

+  }

+#endif

+}

+#ifdef _MSC_VER

+#pragma warning(pop)

+#endif

+static int stbds_is_key_equal(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode, size_t i)

+{

+  if (mode >= STBDS_HM_STRING)

+    return 0==strcmp((char *) key, * (char **) ((char *) a + elemsize*i + keyoffset));

+  else

+    return 0==memcmp(key, (char *) a + elemsize*i + keyoffset, keysize);

+}

+#define STBDS_HASH_TO_ARR(x,elemsize) ((char*) (x) - (elemsize))

+#define STBDS_ARR_TO_HASH(x,elemsize) ((char*) (x) + (elemsize))

+#define stbds_hash_table(a)  ((stbds_hash_index *) stbds_header(a)->hash_table)

+void stbds_hmfree_func(void *a, size_t elemsize)

+{

+  if (a == NULL) return;

+  if (stbds_hash_table(a) != NULL) {

+    if (stbds_hash_table(a)->string.mode == STBDS_SH_STRDUP) {

+      size_t i;

+      // skip 0th element, which is default

+      for (i=1; i < stbds_header(a)->length; ++i)

+        STBDS_FREE(NULL, *(char**) ((char *) a + elemsize*i));

+    }

+    stbds_strreset(&stbds_hash_table(a)->string);

+  }

+  STBDS_FREE(NULL, stbds_header(a)->hash_table);

+  STBDS_FREE(NULL, stbds_header(a));

+}

+static ptrdiff_t stbds_hm_find_slot(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode)

+{

+  void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);

+  stbds_hash_index *table = stbds_hash_table(raw_a);

+  size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);

+  size_t step = STBDS_BUCKET_LENGTH;

+  size_t limit,i;

+  size_t pos;

+  stbds_hash_bucket *bucket;

+  if (hash < 2) hash += 2; // stored hash values are forbidden from being 0, so we can detect empty slots

+  pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);

+  for (;;) {

+    STBDS_STATS(++stbds_hash_probes);

+    bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];

+    // start searching from pos to end of bucket, this should help performance on small hash tables that fit in cache

+    for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) {

+      if (bucket->hash[i] == hash) {

+        if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

+          return (pos & ~STBDS_BUCKET_MASK)+i;

+        }

+      } else if (bucket->hash[i] == STBDS_HASH_EMPTY) {

+        return -1;

+      }

+    }

+    // search from beginning of bucket to pos

+    limit = pos & STBDS_BUCKET_MASK;

+    for (i = 0; i < limit; ++i) {

+      if (bucket->hash[i] == hash) {

+        if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

+          return (pos & ~STBDS_BUCKET_MASK)+i;

+        }

+      } else if (bucket->hash[i] == STBDS_HASH_EMPTY) {

+        return -1;

+      }

+    }

+    // quadratic probing

+    pos += step;

+    step += STBDS_BUCKET_LENGTH;

+    pos &= (table->slot_count-1);

+  }

+  /* NOTREACHED */

+}

+void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode)

+{

+  size_t keyoffset = 0;

+  if (a == NULL) {

+    // make it non-empty so we can return a temp

+    a = stbds_arrgrowf(0, elemsize, 0, 1);

+    stbds_header(a)->length += 1;

+    memset(a, 0, elemsize);

+    *temp = STBDS_INDEX_EMPTY;

+    // adjust a to point after the default element

+    return STBDS_ARR_TO_HASH(a,elemsize);

+  } else {

+    stbds_hash_index *table;

+    void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);

+    // adjust a to point to the default element

+    table = (stbds_hash_index *) stbds_header(raw_a)->hash_table;

+    if (table == 0) {

+      *temp = -1;

+    } else {

+      ptrdiff_t slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode);

+      if (slot < 0) {

+        *temp = STBDS_INDEX_EMPTY;

+      } else {

+        stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT];

+        *temp = b->index[slot & STBDS_BUCKET_MASK];

+      }

+    }

+    return a;

+  }

+}

+void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode)

+{

+  ptrdiff_t temp;

+  void *p = stbds_hmget_key_ts(a, elemsize, key, keysize, &temp, mode);

+  stbds_temp(STBDS_HASH_TO_ARR(p,elemsize)) = temp;

+  return p;

+}

+void * stbds_hmput_default(void *a, size_t elemsize)

+{

+  // three cases:

+  //   a is NULL <- allocate

+  //   a has a hash table but no entries, because of shmode <- grow

+  //   a has entries <- do nothing

+  if (a == NULL || stbds_header(STBDS_HASH_TO_ARR(a,elemsize))->length == 0) {

+    a = stbds_arrgrowf(a ? STBDS_HASH_TO_ARR(a,elemsize) : NULL, elemsize, 0, 1);

+    stbds_header(a)->length += 1;

+    memset(a, 0, elemsize);

+    a=STBDS_ARR_TO_HASH(a,elemsize);

+  }

+  return a;

+}

+static char *stbds_strdup(char *str);

+void *stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode)

+{

+  size_t keyoffset=0;

+  void *raw_a;

+  stbds_hash_index *table;

+  if (a == NULL) {

+    a = stbds_arrgrowf(0, elemsize, 0, 1);

+    memset(a, 0, elemsize);

+    stbds_header(a)->length += 1;

+    // adjust a to point AFTER the default element

+    a = STBDS_ARR_TO_HASH(a,elemsize);

+  }

+  // adjust a to point to the default element

+  raw_a = a;

+  a = STBDS_HASH_TO_ARR(a,elemsize);

+  table = (stbds_hash_index *) stbds_header(a)->hash_table;

+  if (table == NULL || table->used_count >= table->used_count_threshold) {

+    stbds_hash_index *nt;

+    size_t slot_count;

+    slot_count = (table == NULL) ? STBDS_BUCKET_LENGTH : table->slot_count*2;

+    nt = stbds_make_hash_index(slot_count, table);

+    if (table)

+      STBDS_FREE(NULL, table);

+    else

+      nt->string.mode = mode >= STBDS_HM_STRING ? STBDS_SH_DEFAULT : 0;

+    stbds_header(a)->hash_table = table = nt;

+    STBDS_STATS(++stbds_hash_grow);

+  }

+  // we iterate hash table explicitly because we want to track if we saw a tombstone

+  {

+    size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);

+    size_t step = STBDS_BUCKET_LENGTH;

+    size_t limit,i;

+    size_t pos;

+    ptrdiff_t tombstone = -1;

+    stbds_hash_bucket *bucket;

+    // stored hash values are forbidden from being 0, so we can detect empty slots to early out quickly

+    if (hash < 2) hash += 2;

+    pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);

+    for (;;) {

+      STBDS_STATS(++stbds_hash_probes);

+      bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];

+      // start searching from pos to end of bucket

+      for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) {

+        if (bucket->hash[i] == hash) {

+          if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

+            stbds_temp(a) = bucket->index[i];

+            return STBDS_ARR_TO_HASH(a,elemsize);

+          }

+        } else if (bucket->hash[i] == 0) {

+          pos = (pos & ~STBDS_BUCKET_MASK) + i;

+          goto found_empty_slot;

+        } else if (tombstone < 0) {

+          if (bucket->index[i] == STBDS_INDEX_DELETED)

+            tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i);

+        }

+      }

+      // search from beginning of bucket to pos

+      limit = pos & STBDS_BUCKET_MASK;

+      for (i = 0; i < limit; ++i) {

+        if (bucket->hash[i] == hash) {

+          if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {

+            stbds_temp(a) = bucket->index[i];

+            return STBDS_ARR_TO_HASH(a,elemsize);

+          }

+        } else if (bucket->hash[i] == 0) {

+          pos = (pos & ~STBDS_BUCKET_MASK) + i;

+          goto found_empty_slot;

+        } else if (tombstone < 0) {

+          if (bucket->index[i] == STBDS_INDEX_DELETED)

+            tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i);

+        }

+      }

+      // quadratic probing

+      pos += step;

+      step += STBDS_BUCKET_LENGTH;

+      pos &= (table->slot_count-1);

+    }

+   found_empty_slot:

+    if (tombstone >= 0) {

+      pos = tombstone;

+      --table->tombstone_count;

+    }

+    ++table->used_count;

+    {

+      ptrdiff_t i = (ptrdiff_t) stbds_arrlen(a);

+      // we want to do stbds_arraddn(1), but we can't use the macros since we don't have something of the right type

+      if ((size_t) i+1 > stbds_arrcap(a))

+        *(void **) &a = stbds_arrgrowf(a, elemsize, 1, 0);

+      raw_a = STBDS_ARR_TO_HASH(a,elemsize);

+      STBDS_ASSERT((size_t) i+1 <= stbds_arrcap(a));

+      stbds_header(a)->length = i+1;

+      bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];

+      bucket->hash[pos & STBDS_BUCKET_MASK] = hash;

+      bucket->index[pos & STBDS_BUCKET_MASK] = i-1;

+      stbds_temp(a) = i-1;

+      switch (table->string.mode) {

+         case STBDS_SH_STRDUP:  stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_strdup((char*) key); break;

+         case STBDS_SH_ARENA:   stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_stralloc(&table->string, (char*)key); break;

+         case STBDS_SH_DEFAULT: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = (char *) key; break;

+         default:                memcpy((char *) a + elemsize*i, key, keysize); break;

+      }

+    }

+    return STBDS_ARR_TO_HASH(a,elemsize);

+  }

+}

+void * stbds_shmode_func(size_t elemsize, int mode)

+{

+  void *a = stbds_arrgrowf(0, elemsize, 0, 1);

+  stbds_hash_index *h;

+  memset(a, 0, elemsize);

+  stbds_header(a)->length = 1;

+  stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL);

+  h->string.mode = (unsigned char) mode;

+  return STBDS_ARR_TO_HASH(a,elemsize);

+}

+void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode)

+{

+  if (a == NULL) {

+    return 0;

+  } else {

+    stbds_hash_index *table;

+    void *raw_a = STBDS_HASH_TO_ARR(a,elemsize);

+    table = (stbds_hash_index *) stbds_header(raw_a)->hash_table;

+    stbds_temp(raw_a) = 0;

+    if (table == 0) {

+      return a;

+    } else {

+      ptrdiff_t slot;

+      slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode);

+      if (slot < 0)

+        return a;

+      else {

+        stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT];

+        int i = slot & STBDS_BUCKET_MASK;

+        ptrdiff_t old_index = b->index[i];

+        ptrdiff_t final_index = (ptrdiff_t) stbds_arrlen(raw_a)-1-1; // minus one for the raw_a vs a, and minus one for 'last'

+        STBDS_ASSERT(slot < (ptrdiff_t) table->slot_count);

+        --table->used_count;

+        ++table->tombstone_count;

+        stbds_temp(raw_a) = 1;

+        STBDS_ASSERT(table->used_count >= 0);

+        //STBDS_ASSERT(table->tombstone_count < table->slot_count/4);

+        b->hash[i] = STBDS_HASH_DELETED;

+        b->index[i] = STBDS_INDEX_DELETED;

+        if (mode == STBDS_HM_STRING && table->string.mode == STBDS_SH_STRDUP)

+          STBDS_FREE(NULL, *(char**) ((char *) a+elemsize*old_index));

+        // if indices are the same, memcpy is a no-op, but back-pointer-fixup will fail, so skip

+        if (old_index != final_index) {

+          // swap delete

+          memmove((char*) a + elemsize*old_index, (char*) a + elemsize*final_index, elemsize);

+          // now find the slot for the last element

+          if (mode == STBDS_HM_STRING)

+            slot = stbds_hm_find_slot(a, elemsize, *(char**) ((char *) a+elemsize*old_index + keyoffset), keysize, keyoffset, mode);

+          else

+            slot = stbds_hm_find_slot(a, elemsize,  (char* ) a+elemsize*old_index + keyoffset, keysize, keyoffset, mode);

+          STBDS_ASSERT(slot >= 0);

+          b = &table->storage[slot >> STBDS_BUCKET_SHIFT];

+          i = slot & STBDS_BUCKET_MASK;

+          STBDS_ASSERT(b->index[i] == final_index);

+          b->index[i] = old_index;

+        }

+        stbds_header(raw_a)->length -= 1;

+        if (table->used_count < table->used_count_shrink_threshold && table->slot_count > STBDS_BUCKET_LENGTH) {

+          stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count>>1, table);

+          STBDS_FREE(NULL, table);

+          STBDS_STATS(++stbds_hash_shrink);

+        } else if (table->tombstone_count > table->tombstone_count_threshold) {

+          stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count   , table);

+          STBDS_FREE(NULL, table);

+          STBDS_STATS(++stbds_hash_rebuild);

+        }

+        return a;

+      }

+    }

+  }

+  /* NOTREACHED */

+}

+static char *stbds_strdup(char *str)

+{

+  // to keep replaceable allocator simple, we don't want to use strdup.

+  // rolling our own also avoids problem of strdup vs _strdup

+  size_t len = strlen(str)+1;

+  char *p = (char*) STBDS_REALLOC(NULL, 0, len);

+  memmove(p, str, len);

+  return p;

+}

+#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MIN

+#define STBDS_STRING_ARENA_BLOCKSIZE_MIN  512u

+#endif

+#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MAX

+#define STBDS_STRING_ARENA_BLOCKSIZE_MAX  (1u<<20)

+#endif

+char *stbds_stralloc(stbds_string_arena *a, char *str)

+{

+  char *p;

+  size_t len = strlen(str)+1;

+  if (len > a->remaining) {

+    // compute the next blocksize

+    size_t blocksize = a->block;

+    // size is 512, 512, 1024, 1024, 2048, 2048, 4096, 4096, etc., so that

+    // there are log(SIZE) allocations to free when we destroy the table

+    blocksize = (size_t) (STBDS_STRING_ARENA_BLOCKSIZE_MIN) << (blocksize>>1);

+    // if size is under 1M, advance to next blocktype

+    if (blocksize < (size_t)(STBDS_STRING_ARENA_BLOCKSIZE_MAX))

+      ++a->block;

+    if (len > blocksize) {

+      // if string is larger than blocksize, then just allocate the full size.

+      // note that we still advance string_block so block size will continue

+      // increasing, so e.g. if somebody only calls this with 1000-long strings,

+      // eventually the arena will start doubling and handling those as well

+      stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + len);

+      memmove(sb->storage, str, len);

+      if (a->storage) {

+        // insert it after the first element, so that we don't waste the space there

+        sb->next = a->storage->next;

+        a->storage->next = sb;

+      } else {

+        sb->next = 0;

+        a->storage = sb;

+        a->remaining = 0; // this is redundant, but good for clarity

+      }

+      return sb->storage;

+    } else {

+      stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + blocksize);

+      sb->next = a->storage;

+      a->storage = sb;

+      a->remaining = blocksize;

+    }

+  }

+  STBDS_ASSERT(len <= a->remaining);

+  p = a->storage->storage + a->remaining - len;

+  a->remaining -= len;

+  memmove(p, str, len);

+  return p;

+}

+void stbds_strreset(stbds_string_arena *a)

+{

+  stbds_string_block *x,*y;

+  x = a->storage;

+  while (x) {

+    y = x->next;

+    STBDS_FREE(NULL, x);

+    x = y;

+  }

+  memset(a, 0, sizeof(*a));

+}

+#endif

+//////////////////////////////////////////////////////////////////////////////

+//

+//   UNIT TESTS

+//

+#ifdef STBDS_UNIT_TESTS

+#include <stdio.h>

+#ifdef STBDS_ASSERT_WAS_UNDEFINED

+#undef STBDS_ASSERT

+#endif

+#ifndef STBDS_ASSERT

+#define STBDS_ASSERT assert

+#include <assert.h>

+#endif

+typedef struct { int key,b,c,d; } stbds_struct;

+typedef struct { int key[2],b,c,d; } stbds_struct2;

+static char buffer[256];

+char *strkey(int n)

+{

+#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)

+   sprintf_s(buffer, sizeof(buffer), "test_%d", n);

+#else

+   sprintf(buffer, "test_%d", n);

+#endif

+   return buffer;

+}

+void stbds_unit_tests(void)

+{

+#if defined(_MSC_VER) && _MSC_VER <= 1200 && defined(__cplusplus)

+  // VC6 C++ doesn't like the template<> trick on unnamed structures, so do nothing!

+  STBDS_ASSERT(0);

+#else

+  const int testsize = 100000;

+  const int testsize2 = testsize/20;

+  int *arr=NULL;

+  struct { int   key;        int value; }  *intmap  = NULL;

+  struct { char *key;        int value; }  *strmap  = NULL, s;

+  struct { stbds_struct key; int value; }  *map     = NULL;

+  stbds_struct                             *map2    = NULL;

+  stbds_struct2                            *map3    = NULL;

+  stbds_string_arena                        sa      = { 0 };

+  int key3[2] = { 1,2 };

+  ptrdiff_t temp;

+  int i,j;

+  STBDS_ASSERT(arrlen(arr)==0);

+  for (i=0; i < 20000; i += 50) {

+    for (j=0; j < i; ++j)

+      arrpush(arr,j);

+    arrfree(arr);

+  }

+  for (i=0; i < 4; ++i) {

+    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);

+    arrdel(arr,i);

+    arrfree(arr);

+    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);

+    arrdelswap(arr,i);

+    arrfree(arr);

+  }

+  for (i=0; i < 5; ++i) {

+    arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4);

+    stbds_arrins(arr,i,5);

+    STBDS_ASSERT(arr[i] == 5);

+    if (i < 4)

+      STBDS_ASSERT(arr[4] == 4);

+    arrfree(arr);

+  }

+  i = 1;

+  STBDS_ASSERT(hmgeti(intmap,i) == -1);

+  hmdefault(intmap, -2);

+  STBDS_ASSERT(hmgeti(intmap, i) == -1);

+  STBDS_ASSERT(hmget (intmap, i) == -2);

+  for (i=0; i < testsize; i+=2)

+    hmput(intmap, i, i*5);

+  for (i=0; i < testsize; i+=1) {

+    if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 );

+    else       STBDS_ASSERT(hmget(intmap, i) == i*5);

+    if (i & 1) STBDS_ASSERT(hmget_ts(intmap, i, temp) == -2 );

+    else       STBDS_ASSERT(hmget_ts(intmap, i, temp) == i*5);

+  }

+  for (i=0; i < testsize; i+=2)

+    hmput(intmap, i, i*3);

+  for (i=0; i < testsize; i+=1)

+    if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 );

+    else       STBDS_ASSERT(hmget(intmap, i) == i*3);

+  for (i=2; i < testsize; i+=4)

+    hmdel(intmap, i); // delete half the entries

+  for (i=0; i < testsize; i+=1)

+    if (i & 3) STBDS_ASSERT(hmget(intmap, i) == -2 );

+    else       STBDS_ASSERT(hmget(intmap, i) == i*3);

+  for (i=0; i < testsize; i+=1)

+    hmdel(intmap, i); // delete the rest of the entries

+  for (i=0; i < testsize; i+=1)

+    STBDS_ASSERT(hmget(intmap, i) == -2 );

+  hmfree(intmap);

+  for (i=0; i < testsize; i+=2)

+    hmput(intmap, i, i*3);

+  hmfree(intmap);

+  #if defined(__clang__) || defined(__GNUC__)

+  #ifndef __cplusplus

+  intmap = NULL;

+  hmput(intmap, 15, 7);

+  hmput(intmap, 11, 3);

+  hmput(intmap,  9, 5);

+  STBDS_ASSERT(hmget(intmap, 9) == 5);

+  STBDS_ASSERT(hmget(intmap, 11) == 3);

+  STBDS_ASSERT(hmget(intmap, 15) == 7);

+  #endif

+  #endif

+  for (i=0; i < testsize; ++i)

+    stralloc(&sa, strkey(i));

+  strreset(&sa);

+  {

+    s.key = "a", s.value = 1;

+    shputs(strmap, s);

+    STBDS_ASSERT(*strmap[0].key == 'a');

+    STBDS_ASSERT(strmap[0].key == s.key);

+    STBDS_ASSERT(strmap[0].value == s.value);

+    shfree(strmap);

+  }

+  {

+    s.key = "a", s.value = 1;

+    sh_new_strdup(strmap);

+    shputs(strmap, s);

+    STBDS_ASSERT(*strmap[0].key == 'a');

+    STBDS_ASSERT(strmap[0].key != s.key);

+    STBDS_ASSERT(strmap[0].value == s.value);

+    shfree(strmap);

+  }

+  {

+    s.key = "a", s.value = 1;

+    sh_new_arena(strmap);

+    shputs(strmap, s);

+    STBDS_ASSERT(*strmap[0].key == 'a');

+    STBDS_ASSERT(strmap[0].key != s.key);

+    STBDS_ASSERT(strmap[0].value == s.value);

+    shfree(strmap);

+  }

+  for (j=0; j < 2; ++j) {

+    STBDS_ASSERT(shgeti(strmap,"foo") == -1);

+    if (j == 0)

+      sh_new_strdup(strmap);

+    else

+      sh_new_arena(strmap);

+    STBDS_ASSERT(shgeti(strmap,"foo") == -1);

+    shdefault(strmap, -2);

+    STBDS_ASSERT(shgeti(strmap,"foo") == -1);

+    for (i=0; i < testsize; i+=2)

+      shput(strmap, strkey(i), i*3);

+    for (i=0; i < testsize; i+=1)

+      if (i & 1) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );

+      else       STBDS_ASSERT(shget(strmap, strkey(i)) == i*3);

+    for (i=2; i < testsize; i+=4)

+      shdel(strmap, strkey(i)); // delete half the entries

+    for (i=0; i < testsize; i+=1)

+      if (i & 3) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );

+      else       STBDS_ASSERT(shget(strmap, strkey(i)) == i*3);

+    for (i=0; i < testsize; i+=1)

+      shdel(strmap, strkey(i)); // delete the rest of the entries

+    for (i=0; i < testsize; i+=1)

+      STBDS_ASSERT(shget(strmap, strkey(i)) == -2 );

+    shfree(strmap);

+  }

+  {

+    struct { char *key; char value; } *hash = NULL;

+    char name[4] = "jen";

+    shput(hash, "bob"   , 'h');

+    shput(hash, "sally" , 'e');

+    shput(hash, "fred"  , 'l');

+    shput(hash, "jen"   , 'x');

+    shput(hash, "doug"  , 'o');

+    shput(hash, name    , 'l');

+    shfree(hash);

+  }

+  for (i=0; i < testsize; i += 2) {

+    stbds_struct s = { i,i*2,i*3,i*4 };

+    hmput(map, s, i*5);

+  }

+  for (i=0; i < testsize; i += 1) {

+    stbds_struct s = { i,i*2,i*3  ,i*4 };

+    stbds_struct t = { i,i*2,i*3+1,i*4 };

+    if (i & 1) STBDS_ASSERT(hmget(map, s) == 0);

+    else       STBDS_ASSERT(hmget(map, s) == i*5);

+    if (i & 1) STBDS_ASSERT(hmget_ts(map, s, temp) == 0);

+    else       STBDS_ASSERT(hmget_ts(map, s, temp) == i*5);

+    //STBDS_ASSERT(hmget(map, t.key) == 0);

+  }

+  for (i=0; i < testsize; i += 2) {

+    stbds_struct s = { i,i*2,i*3,i*4 };

+    hmputs(map2, s);

+  }

+  hmfree(map);

+  for (i=0; i < testsize; i += 1) {

+    stbds_struct s = { i,i*2,i*3,i*4 };

+    stbds_struct t = { i,i*2,i*3+1,i*4 };

+    if (i & 1) STBDS_ASSERT(hmgets(map2, s.key).d == 0);

+    else       STBDS_ASSERT(hmgets(map2, s.key).d == i*4);

+    //STBDS_ASSERT(hmgetp(map2, t.key) == 0);

+  }

+  hmfree(map2);

+  for (i=0; i < testsize; i += 2) {

+    stbds_struct2 s = { { i,i*2 }, i*3,i*4, i*5 };

+    hmputs(map3, s);

+  }

+  for (i=0; i < testsize; i += 1) {

+    stbds_struct2 s = { { i,i*2}, i*3, i*4, i*5 };

+    stbds_struct2 t = { { i,i*2}, i*3+1, i*4, i*5 };

+    if (i & 1) STBDS_ASSERT(hmgets(map3, s.key).d == 0);

+    else       STBDS_ASSERT(hmgets(map3, s.key).d == i*5);

+    //STBDS_ASSERT(hmgetp(map3, t.key) == 0);

+  }

+#endif

+}

+#endif

+/*

+------------------------------------------------------------------------------

+This software is available under 2 licenses -- choose whichever you prefer.

+------------------------------------------------------------------------------

+ALTERNATIVE A - MIT License

+Copyright (c) 2019 Sean Barrett

+Permission is hereby granted, free of charge, to any person obtaining a copy of

+this software and associated documentation files (the "Software"), to deal in

+the Software without restriction, including without limitation the rights to

+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

+of the Software, and to permit persons to whom the Software is furnished to do

+so, subject to the following conditions:

+The above copyright notice and this permission notice shall be included in all

+copies or substantial portions of the Software.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+SOFTWARE.

+------------------------------------------------------------------------------

+ALTERNATIVE B - Public Domain (www.unlicense.org)

+This is free and unencumbered software released into the public domain.

+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

+software, either in source code form or as a compiled binary, for any purpose,

+commercial or non-commercial, and by any means.

+In jurisdictions that recognize copyright laws, the author or authors of this

+software dedicate any and all copyright interest in the software to the public

+domain. We make this dedication for the benefit of the public at large and to

+the detriment of our heirs and successors. We intend this dedication to be an

+overt act of relinquishment in perpetuity of all present and future rights to

+this software under copyright law.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+------------------------------------------------------------------------------

+*/

--- /dev/null

+++ b/include-demo/stb_image.h

@@ -1,0 +1,8002 @@

+/* stb_image - v2.25 - public domain image loader - http://nothings.org/stb

+								  no warranty implied; use at your own risk

+   Do this:

+	  #define STB_IMAGE_IMPLEMENTATION

+   before you include this file in *one* C or C++ file to create the

+implementation.

+   // i.e. it should look like this:

+   #include ...

+   #include ...

+   #include ...

+   #define STB_IMAGE_IMPLEMENTATION

+   #include "stb_image.h"

+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.

+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using

+malloc,realloc,free

+   QUICK NOTES:

+	  Primarily of interest to game developers and other people who can

+		  avoid problematic images and only need the trivial interface

+	  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as

+stock IJG lib) PNG 1/2/4/8/16-bit-per-channel

+	  TGA (not sure what subset, if a subset)

+	  BMP non-1bpp, non-RLE

+	  PSD (composited view only, no extra channels, 8/16 bit-per-channel)

+	  GIF (*comp always reports as 4-channel)

+	  HDR (radiance rgbE format)

+	  PIC (Softimage PIC)

+	  PNM (PPM and PGM binary only)

+	  Animated GIF still needs a proper API, but here's one way to do it:

+		  http://gist.github.com/urraka/685d9a6340b26b830d49

+	  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)

+	  - decode from arbitrary I/O callbacks

+	  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)

+   Full documentation under "DOCUMENTATION" below.

+LICENSE

+  See end of file for license information.

+RECENT REVISION HISTORY:

+	  2.25  (2020-02-02) fix warnings

+	  2.24  (2020-02-02) fix warnings; thread-local failure_reason and

+flip_vertically 2.23  (2019-08-11) fix clang static analysis warning 2.22

+(2019-03-04) gif fixes, fix warnings 2.21  (2019-02-25) fix typo in comment 2.20

+(2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs

+	  2.19  (2018-02-11) fix warning

+	  2.18  (2018-01-30) fix warnings

+	  2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings

+	  2.16  (2017-07-23) all functions have 16-bit variants; optimizations;

+bugfixes 2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE

+detection on GCC 2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for

+Imagenet JPGs 2.13  (2016-12-04) experimental 16-bit API, only for PNG so far;

+fixes 2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11

+(2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 RGB-format JPEG; remove

+white matting in PSD; allocate large structures on the stack; correct channel

+count for PNG & BMP 2.10  (2016-01-22) avoid warning introduced in 2.09 2.09

+(2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED

+   See end of file for full revision history.

+ ============================    Contributors    =========================

+ Image formats                          Extensions, features

+	Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)

+	Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)

+	Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)

+	Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)

+	Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)

+	Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)

+	Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)

+	github:urraka (animated gif)           Junggon Kim (PNM comments)

+	Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)

+										   socks-the-fox (16-bit PNG)

+										   Jeremy Sawicki (handle all ImageNet

+JPGs) Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)

+	Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)

+	Arseny Kapoulkine

+	John-Mark Allen

+	Carmelo J Fdez-Aguera

+ Bug & warning fixes

+	Marc LeBlanc            David Woo          Guillaume George   Martins

+Mozeiko Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil

+Jordan Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed

+	Won Chun                Luke Graham        Johan Duparc       Nick Verigakis

+	the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh

+	Janez Zemva             John Bartholomew   Michal Cichon github:romigrou

+	Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk

+	Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar

+	Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex

+	Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210

+	Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw

+	Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus

+	Julian Raschke          Gregory Mullen     Baldur Karlsson

+github:poppolopoppo Christian Floisand      Kevin Schmidt      JR Smith

+github:darealshinji Brad Weinberger         Matvey Cherevko

+github:Michaelangel007 Blazej Dariusz Roszkowski                  Alexander

+Veselov

+*/

+#ifndef STBI_INCLUDE_STB_IMAGE_H

+#define STBI_INCLUDE_STB_IMAGE_H

+// DOCUMENTATION

+//

+// Limitations:

+//    - no 12-bit-per-channel JPEG

+//    - no JPEGs with arithmetic coding

+//    - GIF always returns *comp=4

+//

+// Basic usage (see HDR discussion below for HDR usage):

+//    int x,y,n;

+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);

+//    // ... process data if not NULL ...

+//    // ... x = width, y = height, n = # 8-bit components per pixel ...

+//    // ... replace '0' with '1'..'4' to force that many components per pixel

+//    // ... but 'n' will always be the number that it would have been if you

+//    said 0 stbi_image_free(data)

+//

+// Standard parameters:

+//    int *x                 -- outputs image width in pixels

+//    int *y                 -- outputs image height in pixels

+//    int *channels_in_file  -- outputs # of image components in image file

+//    int desired_channels   -- if non-zero, # of image components requested in

+//    result

+//

+// The return value from an image loader is an 'unsigned char *' which points

+// to the pixel data, or NULL on an allocation failure or if the image is

+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,

+// with each pixel consisting of N interleaved 8-bit components; the first

+// pixel pointed to is top-left-most in the image. There is no padding between

+// image scanlines or between pixels, regardless of format. The number of

+// components N is 'desired_channels' if desired_channels is non-zero, or

+// *channels_in_file otherwise. If desired_channels is non-zero,

+// *channels_in_file has the number of components that _would_ have been

+// output otherwise. E.g. if you set desired_channels to 4, you will always

+// get RGBA output, but you can check *channels_in_file to see if it's trivially

+// opaque because e.g. there were only 3 channels in the source image.

+//

+// An output image with N components has the following components interleaved

+// in this order in each pixel:

+//

+//     N=#comp     components

+//       1           grey

+//       2           grey, alpha

+//       3           red, green, blue

+//       4           red, green, blue, alpha

+//

+// If image loading fails for any reason, the return value will be NULL,

+// and *x, *y, *channels_in_file will be unchanged. The function

+// stbi_failure_reason() can be queried for an extremely brief, end-user

+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS

+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get

+// slightly more user-friendly ones.

+//

+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.

+//

+// ===========================================================================

+//

+// UNICODE:

+//

+//   If compiling for Windows and you wish to use Unicode filenames, compile

+//   with

+//       #define STBI_WINDOWS_UTF8

+//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert

+//   Windows wchar_t filenames to utf8.

+//

+// ===========================================================================

+//

+// Philosophy

+//

+// stb libraries are designed with the following priorities:

+//

+//    1. easy to use

+//    2. easy to maintain

+//    3. good performance

+//

+// Sometimes I let "good performance" creep up in priority over "easy to

+// maintain", and for best performance I may provide less-easy-to-use APIs that

+// give higher performance, in addition to the easy-to-use ones. Nevertheless,

+// it's important to keep in mind that from the standpoint of you, a client of

+// this library, all you care about is #1 and #3, and stb libraries DO NOT

+// emphasize #3 above all.

+//

+// Some secondary priorities arise directly from the first two, some of which

+// provide more explicit reasons why performance can't be emphasized.

+//

+//    - Portable ("ease of use")

+//    - Small source code footprint ("easy to maintain")

+//    - No dependencies ("ease of use")

+//

+// ===========================================================================

+//

+// I/O callbacks

+//

+// I/O callbacks allow you to read from arbitrary sources, like packaged

+// files or some other source. Data read from callbacks are processed

+// through a small internal buffer (currently 128 bytes) to try to reduce

+// overhead.

+//

+// The three functions you must define are "read" (reads some bytes of data),

+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the

+// end).

+//

+// ===========================================================================

+//

+// SIMD support

+//

+// The JPEG decoder will try to automatically use SIMD kernels on x86 when

+// supported by the compiler. For ARM Neon support, you must explicitly

+// request it.

+//

+// (The old do-it-yourself SIMD API is no longer supported in the current

+// code.)

+//

+// On x86, SSE2 will automatically be used when available based on a run-time

+// test; if not, the generic C versions are used as a fall-back. On ARM targets,

+// the typical path is to have separate builds for NEON and non-NEON devices

+// (at least this is true for iOS and Android). Therefore, the NEON support is

+// toggled by a build flag: define STBI_NEON to get NEON loops.

+//

+// If for some reason you do not want to use any of SIMD code, or if

+// you have issues compiling it, you can disable it entirely by

+// defining STBI_NO_SIMD.

+//

+// ===========================================================================

+//

+// HDR image support   (disable by defining STBI_NO_HDR)

+//

+// stb_image supports loading HDR images in general, and currently the Radiance

+// .HDR file format specifically. You can still load any file through the

+// existing interface; if you attempt to load an HDR file, it will be

+// automatically remapped to LDR, assuming gamma 2.2 and an arbitrary scale

+// factor defaulting to 1; both of these constants can be reconfigured through

+// this interface:

+//

+//     stbi_hdr_to_ldr_gamma(2.2f);

+//     stbi_hdr_to_ldr_scale(1.0f);

+//

+// (note, do not use _inverse_ constants; stbi_image will invert them

+// appropriately).

+//

+// Additionally, there is a new, parallel interface for loading files as

+// (linear) floats to preserve the full dynamic range:

+//

+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);

+//

+// If you load LDR images through this interface, those images will

+// be promoted to floating point values, run through the inverse of

+// constants corresponding to the above:

+//

+//     stbi_ldr_to_hdr_scale(1.0f);

+//     stbi_ldr_to_hdr_gamma(2.2f);

+//

+// Finally, given a filename (or an open file or memory block--see header

+// file for details) containing image data, you can query for the "most

+// appropriate" interface to use (that is, whether the image is HDR or

+// not), using:

+//

+//     stbi_is_hdr(char *filename);

+//

+// ===========================================================================

+//

+// iPhone PNG support:

+//

+// By default we convert iphone-formatted PNGs back to RGB, even though

+// they are internally encoded differently. You can disable this conversion

+// by calling stbi_convert_iphone_png_to_rgb(0), in which case

+// you will always just get the native iphone "format" through (which

+// is BGR stored in RGB).

+//

+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per

+// pixel to remove any premultiplied alpha *only* if the image file explicitly

+// says there's premultiplied data (currently only happens in iPhone images,

+// and only if iPhone convert-to-rgb processing is on).

+//

+// ===========================================================================

+//

+// ADDITIONAL CONFIGURATION

+//

+//  - You can suppress implementation of any of the decoders to reduce

+//    your code footprint by #defining one or more of the following

+//    symbols before creating the implementation.

+//

+//        STBI_NO_JPEG

+//        STBI_NO_PNG

+//        STBI_NO_BMP

+//        STBI_NO_PSD

+//        STBI_NO_TGA

+//        STBI_NO_GIF

+//        STBI_NO_HDR

+//        STBI_NO_PIC

+//        STBI_NO_PNM   (.ppm and .pgm)

+//

+//  - You can request *only* certain decoders and suppress all other ones

+//    (this will be more forward-compatible, as addition of new decoders

+//    doesn't require you to disable them explicitly):

+//

+//        STBI_ONLY_JPEG

+//        STBI_ONLY_PNG

+//        STBI_ONLY_BMP

+//        STBI_ONLY_PSD

+//        STBI_ONLY_TGA

+//        STBI_ONLY_GIF

+//        STBI_ONLY_HDR

+//        STBI_ONLY_PIC

+//        STBI_ONLY_PNM   (.ppm and .pgm)

+//

+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still

+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB

+//

+#ifndef STBI_NO_STDIO

+#include <stdio.h>

+#endif // STBI_NO_STDIO

+#define STBI_VERSION 1

+enum {

+	STBI_default = 0, // only used for desired_channels

+	STBI_grey = 1,

+	STBI_grey_alpha = 2,

+	STBI_rgb = 3,

+	STBI_rgb_alpha = 4

+};

+#include <stdlib.h>

+typedef unsigned char stbi_uc;

+typedef unsigned short stbi_us;

+#ifdef __cplusplus

+extern "C" {

+#endif

+#ifndef STBIDEF

+#ifdef STB_IMAGE_STATIC

+#define STBIDEF static

+#else

+#define STBIDEF extern

+#endif

+#endif

+//////////////////////////////////////////////////////////////////////////////

+//

+// PRIMARY API - works on images of any type

+//

+//

+// load image by filename, open file, or memory buffer

+//

+typedef struct {

+	int (*read)(void* user, char* data,

+				int size);			 // fill 'data' with 'size' bytes.  return number of

+									 // bytes actually read

+	void (*skip)(void* user, int n); // skip the next 'n' bytes, or 'unget' the

+									 // last -n bytes if negative

+	int (*eof)(void* user);			 // returns nonzero if we are at end of file/data

+} stbi_io_callbacks;

+////////////////////////////////////

+//

+// 8-bits-per-channel interface

+//

+STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);

+STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);

+#ifndef STBI_NO_STDIO

+STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);

+STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);

+// for stbi_load_from_file, file pointer is left pointing immediately after

+// image

+#endif

+#ifndef STBI_NO_GIF

+STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp);

+#endif

+#ifdef STBI_WINDOWS_UTF8

+STBIDEF int stbi_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input);

+#endif

+////////////////////////////////////

+//

+// 16-bits-per-channel interface

+//

+STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);

+STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);

+#ifndef STBI_NO_STDIO

+STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);

+STBIDEF stbi_us* stbi_load_from_file_16(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);

+#endif

+////////////////////////////////////

+//

+// float-per-channel interface

+//

+#ifndef STBI_NO_LINEAR

+STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);

+STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);

+#ifndef STBI_NO_STDIO

+STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);

+STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);

+#endif

+#endif

+#ifndef STBI_NO_HDR

+STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);

+STBIDEF void stbi_hdr_to_ldr_scale(float scale);

+#endif // STBI_NO_HDR

+#ifndef STBI_NO_LINEAR

+STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);

+STBIDEF void stbi_ldr_to_hdr_scale(float scale);

+#endif // STBI_NO_LINEAR

+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR

+STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user);

+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len);

+#ifndef STBI_NO_STDIO

+STBIDEF int stbi_is_hdr(char const* filename);

+STBIDEF int stbi_is_hdr_from_file(FILE* f);

+#endif // STBI_NO_STDIO

+// get a VERY brief reason for failure

+// on most compilers (and ALL modern mainstream compilers) this is threadsafe

+STBIDEF const char* stbi_failure_reason(void);

+// free the loaded image -- this is just free()

+STBIDEF void stbi_image_free(void* retval_from_stbi_load);

+// get image dimensions & components without fully decoding

+STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp);

+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp);

+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len);

+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* clbk, void* user);

+#ifndef STBI_NO_STDIO

+STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp);

+STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp);

+STBIDEF int stbi_is_16_bit(char const* filename);

+STBIDEF int stbi_is_16_bit_from_file(FILE* f);

+#endif

+// for image formats that explicitly notate that they have premultiplied alpha,

+// we just return the colors as stored in the file. set this flag to force

+// unpremultiplication. results are undefined if the unpremultiply overflow.

+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);

+// indicate whether we should process iphone images back to canonical format,

+// or just pass them through "as-is"

+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);

+// flip the image vertically, so the first pixel in the output array is the

+// bottom left

+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);

+// as above, but only applies to images loaded on the thread that calls the

+// function this function is only available if your compiler supports

+// thread-local variables; calling it will fail to link if your compiler doesn't

+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);

+// ZLIB client - used by PNG, available for other purposes

+STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen);

+STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header);

+STBIDEF char* stbi_zlib_decode_malloc(const char* buffer, int len, int* outlen);

+STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);

+STBIDEF char* stbi_zlib_decode_noheader_malloc(const char* buffer, int len, int* outlen);

+STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);

+#ifdef __cplusplus

+}

+#endif

+//

+//

+////   end header file   /////////////////////////////////////////////////////

+#endif // STBI_INCLUDE_STB_IMAGE_H

+#ifdef STB_IMAGE_IMPLEMENTATION

+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) ||                         \

+	defined(STBI_ONLY_PSD) || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) || defined(STBI_ONLY_ZLIB)

+#ifndef STBI_ONLY_JPEG

+#define STBI_NO_JPEG

+#endif

+#ifndef STBI_ONLY_PNG

+#define STBI_NO_PNG

+#endif

+#ifndef STBI_ONLY_BMP

+#define STBI_NO_BMP

+#endif

+#ifndef STBI_ONLY_PSD

+#define STBI_NO_PSD

+#endif

+#ifndef STBI_ONLY_TGA

+#define STBI_NO_TGA

+#endif

+#ifndef STBI_ONLY_GIF

+#define STBI_NO_GIF

+#endif

+#ifndef STBI_ONLY_HDR

+#define STBI_NO_HDR

+#endif

+#ifndef STBI_ONLY_PIC

+#define STBI_NO_PIC

+#endif

+#ifndef STBI_ONLY_PNM

+#define STBI_NO_PNM

+#endif

+#endif

+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)

+#define STBI_NO_ZLIB

+#endif

+#include <limits.h>

+#include <stdarg.h>

+#include <stddef.h> // ptrdiff_t on osx

+#include <stdlib.h>

+#include <string.h>

+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)

+#include <math.h> // ldexp, pow

+#endif

+#ifndef STBI_NO_STDIO

+#include <stdio.h>

+#endif

+#ifndef STBI_ASSERT

+#include <assert.h>

+#define STBI_ASSERT(x) assert(x)

+#endif

+#ifdef __cplusplus

+#define STBI_EXTERN extern "C"

+#else

+#define STBI_EXTERN extern

+#endif

+#ifndef _MSC_VER

+#ifdef __cplusplus

+#define stbi_inline inline

+#else

+#define stbi_inline

+#endif

+#else

+#define stbi_inline __forceinline

+#endif

+#ifndef STBI_NO_THREAD_LOCALS

+#if defined(__cplusplus) && __cplusplus >= 201103L

+#define STBI_THREAD_LOCAL thread_local

+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L

+#define STBI_THREAD_LOCAL _Thread_local

+#elif defined(__GNUC__)

+#define STBI_THREAD_LOCAL __thread

+#elif defined(_MSC_VER)

+#define STBI_THREAD_LOCAL __declspec(thread)

+#endif

+#endif

+#ifdef _MSC_VER

+typedef unsigned short stbi__uint16;

+typedef signed short stbi__int16;

+typedef unsigned int stbi__uint32;

+typedef signed int stbi__int32;

+#else

+#include <stdint.h>

+typedef uint16_t stbi__uint16;

+typedef int16_t stbi__int16;

+typedef uint32_t stbi__uint32;

+typedef int32_t stbi__int32;

+#endif

+// should produce compiler error if size is wrong

+typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];

+#ifdef _MSC_VER

+#define STBI_NOTUSED(v) (void)(v)

+#else

+#define STBI_NOTUSED(v) (void)sizeof(v)

+#endif

+#ifdef _MSC_VER

+#define STBI_HAS_LROTL

+#endif

+#ifdef STBI_HAS_LROTL

+#define stbi_lrot(x, y) _lrotl(x, y)

+#else

+#define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (32 - (y))))

+#endif

+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))

+// ok

+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)

+// ok

+#else

+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."

+#endif

+#ifndef STBI_MALLOC

+#define STBI_MALLOC(sz) malloc(sz)

+#define STBI_REALLOC(p, newsz) realloc(p, newsz)

+#define STBI_FREE(p) free(p)

+#endif

+#ifndef STBI_REALLOC_SIZED

+#define STBI_REALLOC_SIZED(p, oldsz, newsz) STBI_REALLOC(p, newsz)

+#endif

+// x86/x64 detection

+#if defined(__x86_64__) || defined(_M_X64)

+#define STBI__X64_TARGET

+#elif defined(__i386) || defined(_M_IX86)

+#define STBI__X86_TARGET

+#endif

+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)

+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,

+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,

+// but previous attempts to provide the SSE2 functions with runtime

+// detection caused numerous issues. The way architecture extensions are

+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.

+// New behavior: if compiled with -msse2, we use SSE2 without any

+// detection; if not, we don't use it at all.

+#define STBI_NO_SIMD

+#endif

+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)

+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid

+// STBI__X64_TARGET

+//

+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the

+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.

+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not

+// simultaneously enabling "-mstackrealign".

+//

+// See https://github.com/nothings/stb/issues/81 for more information.

+//

+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added

+// -mstackrealign to your build settings, feel free to #define

+// STBI_MINGW_ENABLE_SSE2.

+#define STBI_NO_SIMD

+#endif

+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))

+#define STBI_SSE2

+#include <emmintrin.h>

+#ifdef _MSC_VER

+#if _MSC_VER >= 1400 // not VC6

+#include <intrin.h>  // __cpuid

+static int stbi__cpuid3(void) {

+	int info[4];

+	__cpuid(info, 1);

+	return info[3];

+}

+#else

+static int stbi__cpuid3(void) {

+	int res;

+	__asm {

+      mov  eax,1

+      cpuid

+      mov  res,edx

+	}

+	return res;

+}

+#endif

+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name

+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)

+static int stbi__sse2_available(void) {

+	int info3 = stbi__cpuid3();

+	return ((info3 >> 26) & 1) != 0;

+}

+#endif

+#else // assume GCC-style if not VC++

+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))

+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)

+static int stbi__sse2_available(void) {

+	// If we're even attempting to compile this on GCC/Clang, that means

+	// -msse2 is on, which means the compiler is allowed to use SSE2

+	// instructions at will, and so are we.

+	return 1;

+}

+#endif

+#endif

+#endif

+// ARM NEON

+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)

+#undef STBI_NEON

+#endif

+#ifdef STBI_NEON

+#include <arm_neon.h>

+// assume GCC or Clang on ARM targets

+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))

+#endif

+#ifndef STBI_SIMD_ALIGN

+#define STBI_SIMD_ALIGN(type, name) type name

+#endif

+///////////////////////////////////////////////

+//

+//  stbi__context struct and start_xxx functions

+// stbi__context structure is our basic context used by all images, so it

+// contains all the IO context, plus some basic image information

+typedef struct {

+	stbi__uint32 img_x, img_y;

+	int img_n, img_out_n;

+	stbi_io_callbacks io;

+	void* io_user_data;

+	int read_from_callbacks;

+	int buflen;

+	stbi_uc buffer_start[128];

+	stbi_uc *img_buffer, *img_buffer_end;

+	stbi_uc *img_buffer_original, *img_buffer_original_end;

+} stbi__context;

+static void stbi__refill_buffer(stbi__context* s);

+// initialize a memory-decode context

+static void stbi__start_mem(stbi__context* s, stbi_uc const* buffer, int len) {

+	s->io.read = NULL;

+	s->read_from_callbacks = 0;

+	s->img_buffer = s->img_buffer_original = (stbi_uc*)buffer;

+	s->img_buffer_end = s->img_buffer_original_end = (stbi_uc*)buffer + len;

+}

+// initialize a callback-based context

+static void stbi__start_callbacks(stbi__context* s, stbi_io_callbacks* c, void* user) {

+	s->io = *c;

+	s->io_user_data = user;

+	s->buflen = sizeof(s->buffer_start);

+	s->read_from_callbacks = 1;

+	s->img_buffer_original = s->buffer_start;

+	stbi__refill_buffer(s);

+	s->img_buffer_original_end = s->img_buffer_end;

+}

+#ifndef STBI_NO_STDIO

+static int stbi__stdio_read(void* user, char* data, int size) { return (int)fread(data, 1, size, (FILE*)user); }

+static void stbi__stdio_skip(void* user, int n) { fseek((FILE*)user, n, SEEK_CUR); }

+static int stbi__stdio_eof(void* user) { return feof((FILE*)user); }

+static stbi_io_callbacks stbi__stdio_callbacks = {

+	stbi__stdio_read,

+	stbi__stdio_skip,

+	stbi__stdio_eof,

+};

+static void stbi__start_file(stbi__context* s, FILE* f) { stbi__start_callbacks(s, &stbi__stdio_callbacks, (void*)f); }

+// static void stop_file(stbi__context *s) { }

+#endif // !STBI_NO_STDIO

+static void stbi__rewind(stbi__context* s) {

+	// conceptually rewind SHOULD rewind to the beginning of the stream,

+	// but we just rewind to the beginning of the initial buffer, because

+	// we only use it after doing 'test', which only ever looks at at most 92

+	// bytes

+	s->img_buffer = s->img_buffer_original;

+	s->img_buffer_end = s->img_buffer_original_end;

+}

+enum { STBI_ORDER_RGB, STBI_ORDER_BGR };

+typedef struct {

+	int bits_per_channel;

+	int num_channels;

+	int channel_order;

+} stbi__result_info;

+#ifndef STBI_NO_JPEG

+static int stbi__jpeg_test(stbi__context* s);

+static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+#ifndef STBI_NO_PNG

+static int stbi__png_test(stbi__context* s);

+static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp);

+static int stbi__png_is16(stbi__context* s);

+#endif

+#ifndef STBI_NO_BMP

+static int stbi__bmp_test(stbi__context* s);

+static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+#ifndef STBI_NO_TGA

+static int stbi__tga_test(stbi__context* s);

+static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+#ifndef STBI_NO_PSD

+static int stbi__psd_test(stbi__context* s);

+static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc);

+static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp);

+static int stbi__psd_is16(stbi__context* s);

+#endif

+#ifndef STBI_NO_HDR

+static int stbi__hdr_test(stbi__context* s);

+static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+#ifndef STBI_NO_PIC

+static int stbi__pic_test(stbi__context* s);

+static void* stbi__pic_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+#ifndef STBI_NO_GIF

+static int stbi__gif_test(stbi__context* s);

+static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp);

+static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+#ifndef STBI_NO_PNM

+static int stbi__pnm_test(stbi__context* s);

+static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);

+static int stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp);

+#endif

+static

+#ifdef STBI_THREAD_LOCAL

+	STBI_THREAD_LOCAL

+#endif

+	const char* stbi__g_failure_reason;

+STBIDEF const char* stbi_failure_reason(void) { return stbi__g_failure_reason; }

+#ifndef STBI_NO_FAILURE_STRINGS

+static int stbi__err(const char* str) {

+	stbi__g_failure_reason = str;

+	return 0;

+}

+#endif

+static void* stbi__malloc(size_t size) { return STBI_MALLOC(size); }

+// stb_image uses ints pervasively, including for offset calculations.

+// therefore the largest decoded image size we can support with the

+// current code, even on 64-bit targets, is INT_MAX. this is not a

+// significant limitation for the intended use case.

+//

+// we do, however, need to make sure our size calculations don't

+// overflow. hence a few helper functions for size calculations that

+// multiply integers together, making sure that they're non-negative

+// and no overflow occurs.

+// return 1 if the sum is valid, 0 on overflow.

+// negative terms are considered invalid.

+static int stbi__addsizes_valid(int a, int b) {

+	if (b < 0)

+		return 0;

+	// now 0 <= b <= INT_MAX, hence also

+	// 0 <= INT_MAX - b <= INTMAX.

+	// And "a + b <= INT_MAX" (which might overflow) is the

+	// same as a <= INT_MAX - b (no overflow)

+	return a <= INT_MAX - b;

+}

+// returns 1 if the product is valid, 0 on overflow.

+// negative factors are considered invalid.

+static int stbi__mul2sizes_valid(int a, int b) {

+	if (a < 0 || b < 0)

+		return 0;

+	if (b == 0)

+		return 1; // mul-by-0 is always safe

+	// portable way to check for no overflows in a*b

+	return a <= INT_MAX / b;

+}

+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)

+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow

+static int stbi__mad2sizes_valid(int a, int b, int add) { return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add); }

+#endif

+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow

+static int stbi__mad3sizes_valid(int a, int b, int c, int add) {

+	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && stbi__addsizes_valid(a * b * c, add);

+}

+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't

+// overflow

+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)

+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) {

+	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && stbi__mul2sizes_valid(a * b * c, d) && stbi__addsizes_valid(a * b * c * d, add);

+}

+#endif

+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)

+// mallocs with size overflow checking

+static void* stbi__malloc_mad2(int a, int b, int add) {

+	if (!stbi__mad2sizes_valid(a, b, add))

+		return NULL;

+	return stbi__malloc(a * b + add);

+}

+#endif

+static void* stbi__malloc_mad3(int a, int b, int c, int add) {

+	if (!stbi__mad3sizes_valid(a, b, c, add))

+		return NULL;

+	return stbi__malloc(a * b * c + add);

+}

+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)

+static void* stbi__malloc_mad4(int a, int b, int c, int d, int add) {

+	if (!stbi__mad4sizes_valid(a, b, c, d, add))

+		return NULL;

+	return stbi__malloc(a * b * c * d + add);

+}

+#endif

+// stbi__err - error

+// stbi__errpf - error returning pointer to float

+// stbi__errpuc - error returning pointer to unsigned char

+#ifdef STBI_NO_FAILURE_STRINGS

+#define stbi__err(x, y) 0

+#elif defined(STBI_FAILURE_USERMSG)

+#define stbi__err(x, y) stbi__err(y)

+#else

+#define stbi__err(x, y) stbi__err(x)

+#endif

+#define stbi__errpf(x, y) ((float*)(size_t)(stbi__err(x, y) ? NULL : NULL))

+#define stbi__errpuc(x, y) ((unsigned char*)(size_t)(stbi__err(x, y) ? NULL : NULL))

+STBIDEF void stbi_image_free(void* retval_from_stbi_load) { STBI_FREE(retval_from_stbi_load); }

+#ifndef STBI_NO_LINEAR

+static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp);

+#endif

+#ifndef STBI_NO_HDR

+static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp);

+#endif

+static int stbi__vertically_flip_on_load_global = 0;

+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { stbi__vertically_flip_on_load_global = flag_true_if_should_flip; }

+#ifndef STBI_THREAD_LOCAL

+#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global

+#else

+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;

+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) {

+	stbi__vertically_flip_on_load_local = flag_true_if_should_flip;

+	stbi__vertically_flip_on_load_set = 1;

+}

+#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set ? stbi__vertically_flip_on_load_local : stbi__vertically_flip_on_load_global)

+#endif // STBI_THREAD_LOCAL

+static void* stbi__load_main(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc) {

+	memset(ri, 0,

+		   sizeof(*ri));				// make sure it's initialized if we add new fields

+	ri->bits_per_channel = 8;			// default is 8 so most paths don't have to be changed

+	ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here

+										// so we can add BGR order

+	ri->num_channels = 0;

+#ifndef STBI_NO_JPEG

+	if (stbi__jpeg_test(s))

+		return stbi__jpeg_load(s, x, y, comp, req_comp, ri);

+#endif

+#ifndef STBI_NO_PNG

+	if (stbi__png_test(s))

+		return stbi__png_load(s, x, y, comp, req_comp, ri);

+#endif

+#ifndef STBI_NO_BMP

+	if (stbi__bmp_test(s))

+		return stbi__bmp_load(s, x, y, comp, req_comp, ri);

+#endif

+#ifndef STBI_NO_GIF

+	if (stbi__gif_test(s))

+		return stbi__gif_load(s, x, y, comp, req_comp, ri);

+#endif

+#ifndef STBI_NO_PSD

+	if (stbi__psd_test(s))

+		return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);

+#else

+	STBI_NOTUSED(bpc);

+#endif

+#ifndef STBI_NO_PIC

+	if (stbi__pic_test(s))

+		return stbi__pic_load(s, x, y, comp, req_comp, ri);

+#endif

+#ifndef STBI_NO_PNM

+	if (stbi__pnm_test(s))

+		return stbi__pnm_load(s, x, y, comp, req_comp, ri);

+#endif

+#ifndef STBI_NO_HDR

+	if (stbi__hdr_test(s)) {

+		float* hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);

+		return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);

+	}

+#endif

+#ifndef STBI_NO_TGA

+	// test tga last because it's a crappy test!

+	if (stbi__tga_test(s))

+		return stbi__tga_load(s, x, y, comp, req_comp, ri);

+#endif

+	return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");

+}

+static stbi_uc* stbi__convert_16_to_8(stbi__uint16* orig, int w, int h, int channels) {

+	int i;

+	int img_len = w * h * channels;

+	stbi_uc* reduced;

+	reduced = (stbi_uc*)stbi__malloc(img_len);

+	if (reduced == NULL)

+		return stbi__errpuc("outofmem", "Out of memory");

+	for (i = 0; i < img_len; ++i)

+		reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient

+													   // approx of 16->8 bit scaling

+	STBI_FREE(orig);

+	return reduced;

+}

+static stbi__uint16* stbi__convert_8_to_16(stbi_uc* orig, int w, int h, int channels) {

+	int i;

+	int img_len = w * h * channels;

+	stbi__uint16* enlarged;

+	enlarged = (stbi__uint16*)stbi__malloc(img_len * 2);

+	if (enlarged == NULL)

+		return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");

+	for (i = 0; i < img_len; ++i)

+		enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff

+	STBI_FREE(orig);

+	return enlarged;

+}

+static void stbi__vertical_flip(void* image, int w, int h, int bytes_per_pixel) {

+	int row;

+	size_t bytes_per_row = (size_t)w * bytes_per_pixel;

+	stbi_uc temp[2048];

+	stbi_uc* bytes = (stbi_uc*)image;

+	for (row = 0; row < (h >> 1); row++) {

+		stbi_uc* row0 = bytes + row * bytes_per_row;

+		stbi_uc* row1 = bytes + (h - row - 1) * bytes_per_row;

+		// swap row0 with row1

+		size_t bytes_left = bytes_per_row;

+		while (bytes_left) {

+			size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);

+			memcpy(temp, row0, bytes_copy);

+			memcpy(row0, row1, bytes_copy);

+			memcpy(row1, temp, bytes_copy);

+			row0 += bytes_copy;

+			row1 += bytes_copy;

+			bytes_left -= bytes_copy;

+		}

+	}

+}

+#ifndef STBI_NO_GIF

+static void stbi__vertical_flip_slices(void* image, int w, int h, int z, int bytes_per_pixel) {

+	int slice;

+	int slice_size = w * h * bytes_per_pixel;

+	stbi_uc* bytes = (stbi_uc*)image;

+	for (slice = 0; slice < z; ++slice) {

+		stbi__vertical_flip(bytes, w, h, bytes_per_pixel);

+		bytes += slice_size;

+	}

+}

+#endif

+static unsigned char* stbi__load_and_postprocess_8bit(stbi__context* s, int* x, int* y, int* comp, int req_comp) {

+	stbi__result_info ri;

+	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);

+	if (result == NULL)

+		return NULL;

+	if (ri.bits_per_channel != 8) {

+		STBI_ASSERT(ri.bits_per_channel == 16);

+		result = stbi__convert_16_to_8((stbi__uint16*)result, *x, *y, req_comp == 0 ? *comp : req_comp);

+		ri.bits_per_channel = 8;

+	}

+	// @TODO: move stbi__convert_format to here

+	if (stbi__vertically_flip_on_load) {

+		int channels = req_comp ? req_comp : *comp;

+		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));

+	}

+	return (unsigned char*)result;

+}

+static stbi__uint16* stbi__load_and_postprocess_16bit(stbi__context* s, int* x, int* y, int* comp, int req_comp) {

+	stbi__result_info ri;

+	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);

+	if (result == NULL)

+		return NULL;

+	if (ri.bits_per_channel != 16) {

+		STBI_ASSERT(ri.bits_per_channel == 8);

+		result = stbi__convert_8_to_16((stbi_uc*)result, *x, *y, req_comp == 0 ? *comp : req_comp);

+		ri.bits_per_channel = 16;

+	}

+	// @TODO: move stbi__convert_format16 to here

+	// @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to

+	// keep more precision

+	if (stbi__vertically_flip_on_load) {

+		int channels = req_comp ? req_comp : *comp;

+		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));

+	}

+	return (stbi__uint16*)result;

+}

+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)

+static void stbi__float_postprocess(float* result, int* x, int* y, int* comp, int req_comp) {

+	if (stbi__vertically_flip_on_load && result != NULL) {

+		int channels = req_comp ? req_comp : *comp;

+		stbi__vertical_flip(result, *x, *y, channels * sizeof(float));

+	}

+}

+#endif

+#ifndef STBI_NO_STDIO

+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char* str, int cbmb, wchar_t* widestr,

+																	int cchwide);

+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t* widestr, int cchwide, char* str,

+																	int cbmb, const char* defchar, int* used_default);

+#endif

+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

+STBIDEF int stbi_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input) {

+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);

+}

+#endif

+static FILE* stbi__fopen(char const* filename, char const* mode) {

+	FILE* f;

+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

+	wchar_t wMode[64];

+	wchar_t wFilename[1024];

+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))

+		return 0;

+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))

+		return 0;

+#if _MSC_VER >= 1400

+	if (0 != _wfopen_s(&f, wFilename, wMode))

+		f = 0;

+#else

+	f = _wfopen(wFilename, wMode);

+#endif

+#elif defined(_MSC_VER) && _MSC_VER >= 1400

+	if (0 != fopen_s(&f, filename, mode))

+		f = 0;

+#else

+	f = fopen(filename, mode);

+#endif

+	return f;

+}

+STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* comp, int req_comp) {

+	FILE* f = stbi__fopen(filename, "rb");

+	unsigned char* result;

+	if (!f)

+		return stbi__errpuc("can't fopen", "Unable to open file");

+	result = stbi_load_from_file(f, x, y, comp, req_comp);

+	fclose(f);

+	return result;

+}

+STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* comp, int req_comp) {

+	unsigned char* result;

+	stbi__context s;

+	stbi__start_file(&s, f);

+	result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);

+	if (result) {

+		// need to 'unget' all the characters in the IO buffer

+		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);

+	}

+	return result;

+}

+STBIDEF stbi__uint16* stbi_load_from_file_16(FILE* f, int* x, int* y, int* comp, int req_comp) {

+	stbi__uint16* result;

+	stbi__context s;

+	stbi__start_file(&s, f);

+	result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);

+	if (result) {

+		// need to 'unget' all the characters in the IO buffer

+		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);

+	}

+	return result;

+}

+STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* comp, int req_comp) {

+	FILE* f = stbi__fopen(filename, "rb");

+	stbi__uint16* result;

+	if (!f)

+		return (stbi_us*)stbi__errpuc("can't fopen", "Unable to open file");

+	result = stbi_load_from_file_16(f, x, y, comp, req_comp);

+	fclose(f);

+	return result;

+}

+#endif //! STBI_NO_STDIO

+STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels) {

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);

+}

+STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels) {

+	stbi__context s;

+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

+	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);

+}

+STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp) {

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);

+}

+STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp) {

+	stbi__context s;

+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

+	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);

+}

+#ifndef STBI_NO_GIF

+STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp) {

+	unsigned char* result;

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	result = (unsigned char*)stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);

+	if (stbi__vertically_flip_on_load) {

+		stbi__vertical_flip_slices(result, *x, *y, *z, *comp);

+	}

+	return result;

+}

+#endif

+#ifndef STBI_NO_LINEAR

+static float* stbi__loadf_main(stbi__context* s, int* x, int* y, int* comp, int req_comp) {

+	unsigned char* data;

+#ifndef STBI_NO_HDR

+	if (stbi__hdr_test(s)) {

+		stbi__result_info ri;

+		float* hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);

+		if (hdr_data)

+			stbi__float_postprocess(hdr_data, x, y, comp, req_comp);

+		return hdr_data;

+	}

+#endif

+	data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);

+	if (data)

+		return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);

+	return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");

+}

+STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp) {

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	return stbi__loadf_main(&s, x, y, comp, req_comp);

+}

+STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp) {

+	stbi__context s;

+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

+	return stbi__loadf_main(&s, x, y, comp, req_comp);

+}

+#ifndef STBI_NO_STDIO

+STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* comp, int req_comp) {

+	float* result;

+	FILE* f = stbi__fopen(filename, "rb");

+	if (!f)

+		return stbi__errpf("can't fopen", "Unable to open file");

+	result = stbi_loadf_from_file(f, x, y, comp, req_comp);

+	fclose(f);

+	return result;

+}

+STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* comp, int req_comp) {

+	stbi__context s;

+	stbi__start_file(&s, f);

+	return stbi__loadf_main(&s, x, y, comp, req_comp);

+}

+#endif // !STBI_NO_STDIO

+#endif // !STBI_NO_LINEAR

+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is

+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always

+// reports false!

+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len) {

+#ifndef STBI_NO_HDR

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	return stbi__hdr_test(&s);

+#else

+	STBI_NOTUSED(buffer);

+	STBI_NOTUSED(len);

+	return 0;

+#endif

+}

+#ifndef STBI_NO_STDIO

+STBIDEF int stbi_is_hdr(char const* filename) {

+	FILE* f = stbi__fopen(filename, "rb");

+	int result = 0;

+	if (f) {

+		result = stbi_is_hdr_from_file(f);

+		fclose(f);

+	}

+	return result;

+}

+STBIDEF int stbi_is_hdr_from_file(FILE* f) {

+#ifndef STBI_NO_HDR

+	long pos = ftell(f);

+	int res;

+	stbi__context s;

+	stbi__start_file(&s, f);

+	res = stbi__hdr_test(&s);

+	fseek(f, pos, SEEK_SET);

+	return res;

+#else

+	STBI_NOTUSED(f);

+	return 0;

+#endif

+}

+#endif // !STBI_NO_STDIO

+STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user) {

+#ifndef STBI_NO_HDR

+	stbi__context s;

+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);

+	return stbi__hdr_test(&s);

+#else

+	STBI_NOTUSED(clbk);

+	STBI_NOTUSED(user);

+	return 0;

+#endif

+}

+#ifndef STBI_NO_LINEAR

+static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;

+STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }

+STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }

+#endif

+static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;

+STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }

+STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }

+//////////////////////////////////////////////////////////////////////////////

+//

+// Common code used by all image loaders

+//

+enum { STBI__SCAN_load = 0, STBI__SCAN_type, STBI__SCAN_header };

+static void stbi__refill_buffer(stbi__context* s) {

+	int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);

+	if (n == 0) {

+		// at end of file, treat same as if from memory, but need to handle case

+		// where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file

+		s->read_from_callbacks = 0;

+		s->img_buffer = s->buffer_start;

+		s->img_buffer_end = s->buffer_start + 1;

+		*s->img_buffer = 0;

+	} else {

+		s->img_buffer = s->buffer_start;

+		s->img_buffer_end = s->buffer_start + n;

+	}

+}

+stbi_inline static stbi_uc stbi__get8(stbi__context* s) {

+	if (s->img_buffer < s->img_buffer_end)

+		return *s->img_buffer++;

+	if (s->read_from_callbacks) {

+		stbi__refill_buffer(s);

+		return *s->img_buffer++;

+	}

+	return 0;

+}

+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)

+// nothing

+#else

+stbi_inline static int stbi__at_eof(stbi__context* s) {

+	if (s->io.read) {

+		if (!(s->io.eof)(s->io_user_data))

+			return 0;

+		// if feof() is true, check if buffer = end

+		// special case: we've only got the special 0 character at the end

+		if (s->read_from_callbacks == 0)

+			return 1;

+	}

+	return s->img_buffer >= s->img_buffer_end;

+}

+#endif

+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) &&           \

+	defined(STBI_NO_PIC)

+// nothing

+#else

+static void stbi__skip(stbi__context* s, int n) {

+	if (n < 0) {

+		s->img_buffer = s->img_buffer_end;

+		return;

+	}

+	if (s->io.read) {

+		int blen = (int)(s->img_buffer_end - s->img_buffer);

+		if (blen < n) {

+			s->img_buffer = s->img_buffer_end;

+			(s->io.skip)(s->io_user_data, n - blen);

+			return;

+		}

+	}

+	s->img_buffer += n;

+}

+#endif

+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)

+// nothing

+#else

+static int stbi__getn(stbi__context* s, stbi_uc* buffer, int n) {

+	if (s->io.read) {

+		int blen = (int)(s->img_buffer_end - s->img_buffer);

+		if (blen < n) {

+			int res, count;

+			memcpy(buffer, s->img_buffer, blen);

+			count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);

+			res = (count == (n - blen));

+			s->img_buffer = s->img_buffer_end;

+			return res;

+		}

+	}

+	if (s->img_buffer + n <= s->img_buffer_end) {

+		memcpy(buffer, s->img_buffer, n);

+		s->img_buffer += n;

+		return 1;

+	} else

+		return 0;

+}

+#endif

+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)

+// nothing

+#else

+static int stbi__get16be(stbi__context* s) {

+	int z = stbi__get8(s);

+	return (z << 8) + stbi__get8(s);

+}

+#endif

+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)

+// nothing

+#else

+static stbi__uint32 stbi__get32be(stbi__context* s) {

+	stbi__uint32 z = stbi__get16be(s);

+	return (z << 16) + stbi__get16be(s);

+}

+#endif

+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)

+// nothing

+#else

+static int stbi__get16le(stbi__context* s) {

+	int z = stbi__get8(s);

+	return z + (stbi__get8(s) << 8);

+}

+#endif

+#ifndef STBI_NO_BMP

+static stbi__uint32 stbi__get32le(stbi__context* s) {

+	stbi__uint32 z = stbi__get16le(s);

+	return z + (stbi__get16le(s) << 16);

+}

+#endif

+#define STBI__BYTECAST(x) ((stbi_uc)((x)&255)) // truncate int to byte without warnings

+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) &&           \

+	defined(STBI_NO_PIC) && defined(STBI_NO_PNM)

+// nothing

+#else

+//////////////////////////////////////////////////////////////////////////////

+//

+//  generic converter from built-in img_n to req_comp

+//    individual types do this automatically as much as possible (e.g. jpeg

+//    does all cases internally since it needs to colorspace convert anyway,

+//    and it never has alpha, so very few cases ). png can automatically

+//    interleave an alpha=255 channel, but falls back to this for other cases

+//

+//  assume data buffer is malloced, so malloc a new one and free that one

+//  only failure mode is malloc failing

+static stbi_uc stbi__compute_y(int r, int g, int b) { return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8); }

+#endif

+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) &&            \

+	defined(STBI_NO_PNM)

+// nothing

+#else

+static unsigned char* stbi__convert_format(unsigned char* data, int img_n, int req_comp, unsigned int x, unsigned int y) {

+	int i, j;

+	unsigned char* good;

+	if (req_comp == img_n)

+		return data;

+	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);

+	good = (unsigned char*)stbi__malloc_mad3(req_comp, x, y, 0);

+	if (good == NULL) {

+		STBI_FREE(data);

+		return stbi__errpuc("outofmem", "Out of memory");

+	}

+	for (j = 0; j < (int)y; ++j) {

+		unsigned char* src = data + j * x * img_n;

+		unsigned char* dest = good + j * x * req_comp;

+#define STBI__COMBO(a, b) ((a)*8 + (b))

+#define STBI__CASE(a, b)                                                                                                                                       \

+	case STBI__COMBO(a, b):                                                                                                                                    \

+		for (i = x - 1; i >= 0; --i, src += a, dest += b)

+		// convert source image with img_n components to one with req_comp

+		// components; avoid switch per pixel, so use switch per scanline and

+		// massive macros

+		switch (STBI__COMBO(img_n, req_comp)) {

+			STBI__CASE(1, 2) {

+				dest[0] = src[0];

+				dest[1] = 255;

+			}

+			break;

+			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

+			break;

+			STBI__CASE(1, 4) {

+				dest[0] = dest[1] = dest[2] = src[0];

+				dest[3] = 255;

+			}

+			break;

+			STBI__CASE(2, 1) { dest[0] = src[0]; }

+			break;

+			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

+			break;

+			STBI__CASE(2, 4) {

+				dest[0] = dest[1] = dest[2] = src[0];

+				dest[3] = src[1];

+			}

+			break;

+			STBI__CASE(3, 4) {

+				dest[0] = src[0];

+				dest[1] = src[1];

+				dest[2] = src[2];

+				dest[3] = 255;

+			}

+			break;

+			STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }

+			break;

+			STBI__CASE(3, 2) {

+				dest[0] = stbi__compute_y(src[0], src[1], src[2]);

+				dest[1] = 255;

+			}

+			break;

+			STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }

+			break;

+			STBI__CASE(4, 2) {

+				dest[0] = stbi__compute_y(src[0], src[1], src[2]);

+				dest[1] = src[3];

+			}

+			break;

+			STBI__CASE(4, 3) {

+				dest[0] = src[0];

+				dest[1] = src[1];

+				dest[2] = src[2];

+			}

+			break;

+		default:

+			STBI_ASSERT(0);

+		}

+#undef STBI__CASE

+	}

+	STBI_FREE(data);

+	return good;

+}

+#endif

+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)

+// nothing

+#else

+static stbi__uint16 stbi__compute_y_16(int r, int g, int b) { return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8); }

+#endif

+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)

+// nothing

+#else

+static stbi__uint16* stbi__convert_format16(stbi__uint16* data, int img_n, int req_comp, unsigned int x, unsigned int y) {

+	int i, j;

+	stbi__uint16* good;

+	if (req_comp == img_n)

+		return data;

+	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);

+	good = (stbi__uint16*)stbi__malloc(req_comp * x * y * 2);

+	if (good == NULL) {

+		STBI_FREE(data);

+		return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");

+	}

+	for (j = 0; j < (int)y; ++j) {

+		stbi__uint16* src = data + j * x * img_n;

+		stbi__uint16* dest = good + j * x * req_comp;

+#define STBI__COMBO(a, b) ((a)*8 + (b))

+#define STBI__CASE(a, b)                                                                                                                                       \

+	case STBI__COMBO(a, b):                                                                                                                                    \

+		for (i = x - 1; i >= 0; --i, src += a, dest += b)

+		// convert source image with img_n components to one with req_comp

+		// components; avoid switch per pixel, so use switch per scanline and

+		// massive macros

+		switch (STBI__COMBO(img_n, req_comp)) {

+			STBI__CASE(1, 2) {

+				dest[0] = src[0];

+				dest[1] = 0xffff;

+			}

+			break;

+			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

+			break;

+			STBI__CASE(1, 4) {

+				dest[0] = dest[1] = dest[2] = src[0];

+				dest[3] = 0xffff;

+			}

+			break;

+			STBI__CASE(2, 1) { dest[0] = src[0]; }

+			break;

+			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }

+			break;

+			STBI__CASE(2, 4) {

+				dest[0] = dest[1] = dest[2] = src[0];

+				dest[3] = src[1];

+			}

+			break;

+			STBI__CASE(3, 4) {

+				dest[0] = src[0];

+				dest[1] = src[1];

+				dest[2] = src[2];

+				dest[3] = 0xffff;

+			}

+			break;

+			STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }

+			break;

+			STBI__CASE(3, 2) {

+				dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);

+				dest[1] = 0xffff;

+			}

+			break;

+			STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }

+			break;

+			STBI__CASE(4, 2) {

+				dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);

+				dest[1] = src[3];

+			}

+			break;

+			STBI__CASE(4, 3) {

+				dest[0] = src[0];

+				dest[1] = src[1];

+				dest[2] = src[2];

+			}

+			break;

+		default:

+			STBI_ASSERT(0);

+		}

+#undef STBI__CASE

+	}

+	STBI_FREE(data);

+	return good;

+}

+#endif

+#ifndef STBI_NO_LINEAR

+static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp) {

+	int i, k, n;

+	float* output;

+	if (!data)

+		return NULL;

+	output = (float*)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);

+	if (output == NULL) {

+		STBI_FREE(data);

+		return stbi__errpf("outofmem", "Out of memory");

+	}

+	// compute number of non-alpha components

+	if (comp & 1)

+		n = comp;

+	else

+		n = comp - 1;

+	for (i = 0; i < x * y; ++i) {

+		for (k = 0; k < n; ++k) {

+			output[i * comp + k] = (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);

+		}

+	}

+	if (n < comp) {

+		for (i = 0; i < x * y; ++i) {

+			output[i * comp + n] = data[i * comp + n] / 255.0f;

+		}

+	}

+	STBI_FREE(data);

+	return output;

+}

+#endif

+#ifndef STBI_NO_HDR

+#define stbi__float2int(x) ((int)(x))

+static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp) {

+	int i, k, n;

+	stbi_uc* output;

+	if (!data)

+		return NULL;

+	output = (stbi_uc*)stbi__malloc_mad3(x, y, comp, 0);

+	if (output == NULL) {

+		STBI_FREE(data);

+		return stbi__errpuc("outofmem", "Out of memory");

+	}

+	// compute number of non-alpha components

+	if (comp & 1)

+		n = comp;

+	else

+		n = comp - 1;

+	for (i = 0; i < x * y; ++i) {

+		for (k = 0; k < n; ++k) {

+			float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;

+			if (z < 0)

+				z = 0;

+			if (z > 255)

+				z = 255;

+			output[i * comp + k] = (stbi_uc)stbi__float2int(z);

+		}

+		if (k < comp) {

+			float z = data[i * comp + k] * 255 + 0.5f;

+			if (z < 0)

+				z = 0;

+			if (z > 255)

+				z = 255;

+			output[i * comp + k] = (stbi_uc)stbi__float2int(z);

+		}

+	}

+	STBI_FREE(data);

+	return output;

+}

+#endif

+//////////////////////////////////////////////////////////////////////////////

+//

+//  "baseline" JPEG/JFIF decoder

+//

+//    simple implementation

+//      - doesn't support delayed output of y-dimension

+//      - simple interface (only one output format: 8-bit interleaved RGB)

+//      - doesn't try to recover corrupt jpegs

+//      - doesn't allow partial loading, loading multiple at once

+//      - still fast on x86 (copying globals into locals doesn't help x86)

+//      - allocates lots of intermediate memory (full size of all components)

+//        - non-interleaved case requires this anyway

+//        - allows good upsampling (see next)

+//    high-quality

+//      - upsampled channels are bilinearly interpolated, even across blocks

+//      - quality integer IDCT derived from IJG's 'slow'

+//    performance

+//      - fast huffman; reasonable integer IDCT

+//      - some SIMD kernels for common paths on targets with SSE2/NEON

+//      - uses a lot of intermediate memory, could cache poorly

+#ifndef STBI_NO_JPEG

+// huffman decoding acceleration

+#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache

+typedef struct {

+	stbi_uc fast[1 << FAST_BITS];

+	// weirdly, repacking this into AoS is a 10% speed loss, instead of a win

+	stbi__uint16 code[256];

+	stbi_uc values[256];

+	stbi_uc size[257];

+	unsigned int maxcode[18];

+	int delta[17]; // old 'firstsymbol' - old 'firstcode'

+} stbi__huffman;

+typedef struct {

+	stbi__context* s;

+	stbi__huffman huff_dc[4];

+	stbi__huffman huff_ac[4];

+	stbi__uint16 dequant[4][64];

+	stbi__int16 fast_ac[4][1 << FAST_BITS];

+	// sizes for components, interleaved MCUs

+	int img_h_max, img_v_max;

+	int img_mcu_x, img_mcu_y;

+	int img_mcu_w, img_mcu_h;

+	// definition of jpeg image component

+	struct {

+		int id;

+		int h, v;

+		int tq;

+		int hd, ha;

+		int dc_pred;

+		int x, y, w2, h2;

+		stbi_uc* data;

+		void *raw_data, *raw_coeff;

+		stbi_uc* linebuf;

+		short* coeff;		  // progressive only

+		int coeff_w, coeff_h; // number of 8x8 coefficient blocks

+	} img_comp[4];

+	stbi__uint32 code_buffer; // jpeg entropy-coded buffer

+	int code_bits;			  // number of valid bits

+	unsigned char marker;	 // marker seen while filling entropy buffer

+	int nomore;				  // flag if we saw a marker so must stop

+	int progressive;

+	int spec_start;

+	int spec_end;

+	int succ_high;

+	int succ_low;

+	int eob_run;

+	int jfif;

+	int app14_color_transform; // Adobe APP14 tag

+	int rgb;

+	int scan_n, order[4];

+	int restart_interval, todo;

+	// kernels

+	void (*idct_block_kernel)(stbi_uc* out, int out_stride, short data[64]);

+	void (*YCbCr_to_RGB_kernel)(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step);

+	stbi_uc* (*resample_row_hv_2_kernel)(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs);

+} stbi__jpeg;

+static int stbi__build_huffman(stbi__huffman* h, int* count) {

+	int i, j, k = 0;

+	unsigned int code;

+	// build size list for each symbol (from JPEG spec)

+	for (i = 0; i < 16; ++i)

+		for (j = 0; j < count[i]; ++j)

+			h->size[k++] = (stbi_uc)(i + 1);

+	h->size[k] = 0;

+	// compute actual symbols (from jpeg spec)

+	code = 0;

+	k = 0;

+	for (j = 1; j <= 16; ++j) {

+		// compute delta to add to code to compute symbol id

+		h->delta[j] = k - code;

+		if (h->size[k] == j) {

+			while (h->size[k] == j)

+				h->code[k++] = (stbi__uint16)(code++);

+			if (code - 1 >= (1u << j))

+				return stbi__err("bad code lengths", "Corrupt JPEG");

+		}

+		// compute largest code + 1 for this size, preshifted as needed later

+		h->maxcode[j] = code << (16 - j);

+		code <<= 1;

+	}

+	h->maxcode[j] = 0xffffffff;

+	// build non-spec acceleration table; 255 is flag for not-accelerated

+	memset(h->fast, 255, 1 << FAST_BITS);

+	for (i = 0; i < k; ++i) {

+		int s = h->size[i];

+		if (s <= FAST_BITS) {

+			int c = h->code[i] << (FAST_BITS - s);

+			int m = 1 << (FAST_BITS - s);

+			for (j = 0; j < m; ++j) {

+				h->fast[c + j] = (stbi_uc)i;

+			}

+		}

+	}

+	return 1;

+}

+// build a table that decodes both magnitude and value of small ACs in

+// one go.

+static void stbi__build_fast_ac(stbi__int16* fast_ac, stbi__huffman* h) {

+	int i;

+	for (i = 0; i < (1 << FAST_BITS); ++i) {

+		stbi_uc fast = h->fast[i];

+		fast_ac[i] = 0;

+		if (fast < 255) {

+			int rs = h->values[fast];

+			int run = (rs >> 4) & 15;

+			int magbits = rs & 15;

+			int len = h->size[fast];

+			if (magbits && len + magbits <= FAST_BITS) {

+				// magnitude code followed by receive_extend code

+				int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);

+				int m = 1 << (magbits - 1);

+				if (k < m)

+					k += (~0U << magbits) + 1;

+				// if the result is small enough, we can fit it in fast_ac table

+				if (k >= -128 && k <= 127)

+					fast_ac[i] = (stbi__int16)((k * 256) + (run * 16) + (len + magbits));

+			}

+		}

+	}

+}

+static void stbi__grow_buffer_unsafe(stbi__jpeg* j) {

+	do {

+		unsigned int b = j->nomore ? 0 : stbi__get8(j->s);

+		if (b == 0xff) {

+			int c = stbi__get8(j->s);

+			while (c == 0xff)

+				c = stbi__get8(j->s); // consume fill bytes

+			if (c != 0) {

+				j->marker = (unsigned char)c;

+				j->nomore = 1;

+				return;

+			}

+		}

+		j->code_buffer |= b << (24 - j->code_bits);

+		j->code_bits += 8;

+	} while (j->code_bits <= 24);

+}

+// (1 << n) - 1

+static const stbi__uint32 stbi__bmask[17] = {0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535};

+// decode a jpeg huffman value from the bitstream

+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg* j, stbi__huffman* h) {

+	unsigned int temp;

+	int c, k;

+	if (j->code_bits < 16)

+		stbi__grow_buffer_unsafe(j);

+	// look at the top FAST_BITS and determine what symbol ID it is,

+	// if the code is <= FAST_BITS

+	c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);

+	k = h->fast[c];

+	if (k < 255) {

+		int s = h->size[k];

+		if (s > j->code_bits)

+			return -1;

+		j->code_buffer <<= s;

+		j->code_bits -= s;

+		return h->values[k];

+	}

+	// naive test is to shift the code_buffer down so k bits are

+	// valid, then test against maxcode. To speed this up, we've

+	// preshifted maxcode left so that it has (16-k) 0s at the

+	// end; in other words, regardless of the number of bits, it

+	// wants to be compared against something shifted to have 16;

+	// that way we don't need to shift inside the loop.

+	temp = j->code_buffer >> 16;

+	for (k = FAST_BITS + 1;; ++k)

+		if (temp < h->maxcode[k])

+			break;

+	if (k == 17) {

+		// error! code not found

+		j->code_bits -= 16;

+		return -1;

+	}

+	if (k > j->code_bits)

+		return -1;

+	// convert the huffman code to the symbol id

+	c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];

+	STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);

+	// convert the id to a symbol

+	j->code_bits -= k;

+	j->code_buffer <<= k;

+	return h->values[c];

+}

+// bias[n] = (-1<<n) + 1

+static const int stbi__jbias[16] = {0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767};

+// combined JPEG 'receive' and JPEG 'extend', since baseline

+// always extends everything it receives.

+stbi_inline static int stbi__extend_receive(stbi__jpeg* j, int n) {

+	unsigned int k;

+	int sgn;

+	if (j->code_bits < n)

+		stbi__grow_buffer_unsafe(j);

+	sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB

+	k = stbi_lrot(j->code_buffer, n);

+	STBI_ASSERT(n >= 0 && n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask)));

+	j->code_buffer = k & ~stbi__bmask[n];

+	k &= stbi__bmask[n];

+	j->code_bits -= n;

+	return k + (stbi__jbias[n] & ~sgn);

+}

+// get some unsigned bits

+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg* j, int n) {

+	unsigned int k;

+	if (j->code_bits < n)

+		stbi__grow_buffer_unsafe(j);

+	k = stbi_lrot(j->code_buffer, n);

+	j->code_buffer = k & ~stbi__bmask[n];

+	k &= stbi__bmask[n];

+	j->code_bits -= n;

+	return k;

+}

+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg* j) {

+	unsigned int k;

+	if (j->code_bits < 1)

+		stbi__grow_buffer_unsafe(j);

+	k = j->code_buffer;

+	j->code_buffer <<= 1;

+	--j->code_bits;

+	return k & 0x80000000;

+}

+// given a value that's at position X in the zigzag stream,

+// where does it appear in the 8x8 matrix coded as row-major?

+static const stbi_uc stbi__jpeg_dezigzag[64 + 15] = {0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7,

+													 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46,

+													 53, 60, 61, 54, 47, 55, 62, 63,

+													 // let corrupt input sample past end

+													 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63};

+// decode one 64-entry block--

+static int stbi__jpeg_decode_block(stbi__jpeg* j, short data[64], stbi__huffman* hdc, stbi__huffman* hac, stbi__int16* fac, int b, stbi__uint16* dequant) {

+	int diff, dc, k;

+	int t;

+	if (j->code_bits < 16)

+		stbi__grow_buffer_unsafe(j);

+	t = stbi__jpeg_huff_decode(j, hdc);

+	if (t < 0)

+		return stbi__err("bad huffman code", "Corrupt JPEG");

+	// 0 all the ac values now so we can do it 32-bits at a time

+	memset(data, 0, 64 * sizeof(data[0]));

+	diff = t ? stbi__extend_receive(j, t) : 0;

+	dc = j->img_comp[b].dc_pred + diff;

+	j->img_comp[b].dc_pred = dc;

+	data[0] = (short)(dc * dequant[0]);

+	// decode AC components, see JPEG spec

+	k = 1;

+	do {

+		unsigned int zig;

+		int c, r, s;

+		if (j->code_bits < 16)

+			stbi__grow_buffer_unsafe(j);

+		c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);

+		r = fac[c];

+		if (r) {				// fast-AC path

+			k += (r >> 4) & 15; // run

+			s = r & 15;			// combined length

+			j->code_buffer <<= s;

+			j->code_bits -= s;

+			// decode into unzigzag'd location

+			zig = stbi__jpeg_dezigzag[k++];

+			data[zig] = (short)((r >> 8) * dequant[zig]);

+		} else {

+			int rs = stbi__jpeg_huff_decode(j, hac);

+			if (rs < 0)

+				return stbi__err("bad huffman code", "Corrupt JPEG");

+			s = rs & 15;

+			r = rs >> 4;

+			if (s == 0) {

+				if (rs != 0xf0)

+					break; // end block

+				k += 16;

+			} else {

+				k += r;

+				// decode into unzigzag'd location

+				zig = stbi__jpeg_dezigzag[k++];

+				data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);

+			}

+		}

+	} while (k < 64);

+	return 1;

+}

+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg* j, short data[64], stbi__huffman* hdc, int b) {

+	int diff, dc;

+	int t;

+	if (j->spec_end != 0)

+		return stbi__err("can't merge dc and ac", "Corrupt JPEG");

+	if (j->code_bits < 16)

+		stbi__grow_buffer_unsafe(j);

+	if (j->succ_high == 0) {

+		// first scan for DC coefficient, must be first

+		memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now

+		t = stbi__jpeg_huff_decode(j, hdc);

+		diff = t ? stbi__extend_receive(j, t) : 0;

+		dc = j->img_comp[b].dc_pred + diff;

+		j->img_comp[b].dc_pred = dc;

+		data[0] = (short)(dc << j->succ_low);

+	} else {

+		// refinement scan for DC coefficient

+		if (stbi__jpeg_get_bit(j))

+			data[0] += (short)(1 << j->succ_low);

+	}

+	return 1;

+}

+// @OPTIMIZE: store non-zigzagged during the decode passes,

+// and only de-zigzag when dequantizing

+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg* j, short data[64], stbi__huffman* hac, stbi__int16* fac) {

+	int k;

+	if (j->spec_start == 0)

+		return stbi__err("can't merge dc and ac", "Corrupt JPEG");

+	if (j->succ_high == 0) {

+		int shift = j->succ_low;

+		if (j->eob_run) {

+			--j->eob_run;

+			return 1;

+		}

+		k = j->spec_start;

+		do {

+			unsigned int zig;

+			int c, r, s;

+			if (j->code_bits < 16)

+				stbi__grow_buffer_unsafe(j);

+			c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);

+			r = fac[c];

+			if (r) {				// fast-AC path

+				k += (r >> 4) & 15; // run

+				s = r & 15;			// combined length

+				j->code_buffer <<= s;

+				j->code_bits -= s;

+				zig = stbi__jpeg_dezigzag[k++];

+				data[zig] = (short)((r >> 8) << shift);

+			} else {

+				int rs = stbi__jpeg_huff_decode(j, hac);

+				if (rs < 0)

+					return stbi__err("bad huffman code", "Corrupt JPEG");

+				s = rs & 15;

+				r = rs >> 4;

+				if (s == 0) {

+					if (r < 15) {

+						j->eob_run = (1 << r);

+						if (r)

+							j->eob_run += stbi__jpeg_get_bits(j, r);

+						--j->eob_run;

+						break;

+					}

+					k += 16;

+				} else {

+					k += r;

+					zig = stbi__jpeg_dezigzag[k++];

+					data[zig] = (short)(stbi__extend_receive(j, s) << shift);

+				}

+			}

+		} while (k <= j->spec_end);

+	} else {

+		// refinement scan for these AC coefficients

+		short bit = (short)(1 << j->succ_low);

+		if (j->eob_run) {

+			--j->eob_run;

+			for (k = j->spec_start; k <= j->spec_end; ++k) {

+				short* p = &data[stbi__jpeg_dezigzag[k]];

+				if (*p != 0)

+					if (stbi__jpeg_get_bit(j))

+						if ((*p & bit) == 0) {

+							if (*p > 0)

+								*p += bit;

+							else

+								*p -= bit;

+						}

+			}

+		} else {

+			k = j->spec_start;

+			do {

+				int r, s;

+				int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here,

+														 // advance-by-r is so slow, eh

+				if (rs < 0)

+					return stbi__err("bad huffman code", "Corrupt JPEG");

+				s = rs & 15;

+				r = rs >> 4;

+				if (s == 0) {

+					if (r < 15) {

+						j->eob_run = (1 << r) - 1;

+						if (r)

+							j->eob_run += stbi__jpeg_get_bits(j, r);

+						r = 64; // force end of block

+					} else {

+						// r=15 s=0 should write 16 0s, so we just do

+						// a run of 15 0s and then write s (which is 0),

+						// so we don't have to do anything special here

+					}

+				} else {

+					if (s != 1)

+						return stbi__err("bad huffman code", "Corrupt JPEG");

+					// sign bit

+					if (stbi__jpeg_get_bit(j))

+						s = bit;

+					else

+						s = -bit;

+				}

+				// advance by r

+				while (k <= j->spec_end) {

+					short* p = &data[stbi__jpeg_dezigzag[k++]];

+					if (*p != 0) {

+						if (stbi__jpeg_get_bit(j))

+							if ((*p & bit) == 0) {

+								if (*p > 0)

+									*p += bit;

+								else

+									*p -= bit;

+							}

+					} else {

+						if (r == 0) {

+							*p = (short)s;

+							break;

+						}

+						--r;

+					}

+				}

+			} while (k <= j->spec_end);

+		}

+	}

+	return 1;

+}

+// take a -128..127 value and stbi__clamp it and convert to 0..255

+stbi_inline static stbi_uc stbi__clamp(int x) {

+	// trick to use a single test to catch both cases

+	if ((unsigned int)x > 255) {

+		if (x < 0)

+			return 0;

+		if (x > 255)

+			return 255;

+	}

+	return (stbi_uc)x;

+}

+#define stbi__f2f(x) ((int)(((x)*4096 + 0.5)))

+#define stbi__fsh(x) ((x)*4096)

+// derived from jidctint -- DCT_ISLOW

+#define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7)                                                                                                          \

+	int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3;                                                                                                    \

+	p2 = s2;                                                                                                                                                   \

+	p3 = s6;                                                                                                                                                   \

+	p1 = (p2 + p3) * stbi__f2f(0.5411961f);                                                                                                                    \

+	t2 = p1 + p3 * stbi__f2f(-1.847759065f);                                                                                                                   \

+	t3 = p1 + p2 * stbi__f2f(0.765366865f);                                                                                                                    \

+	p2 = s0;                                                                                                                                                   \

+	p3 = s4;                                                                                                                                                   \

+	t0 = stbi__fsh(p2 + p3);                                                                                                                                   \

+	t1 = stbi__fsh(p2 - p3);                                                                                                                                   \

+	x0 = t0 + t3;                                                                                                                                              \

+	x3 = t0 - t3;                                                                                                                                              \

+	x1 = t1 + t2;                                                                                                                                              \

+	x2 = t1 - t2;                                                                                                                                              \

+	t0 = s7;                                                                                                                                                   \

+	t1 = s5;                                                                                                                                                   \

+	t2 = s3;                                                                                                                                                   \

+	t3 = s1;                                                                                                                                                   \

+	p3 = t0 + t2;                                                                                                                                              \

+	p4 = t1 + t3;                                                                                                                                              \

+	p1 = t0 + t3;                                                                                                                                              \

+	p2 = t1 + t2;                                                                                                                                              \

+	p5 = (p3 + p4) * stbi__f2f(1.175875602f);                                                                                                                  \

+	t0 = t0 * stbi__f2f(0.298631336f);                                                                                                                         \

+	t1 = t1 * stbi__f2f(2.053119869f);                                                                                                                         \

+	t2 = t2 * stbi__f2f(3.072711026f);                                                                                                                         \

+	t3 = t3 * stbi__f2f(1.501321110f);                                                                                                                         \

+	p1 = p5 + p1 * stbi__f2f(-0.899976223f);                                                                                                                   \

+	p2 = p5 + p2 * stbi__f2f(-2.562915447f);                                                                                                                   \

+	p3 = p3 * stbi__f2f(-1.961570560f);                                                                                                                        \

+	p4 = p4 * stbi__f2f(-0.390180644f);                                                                                                                        \

+	t3 += p1 + p4;                                                                                                                                             \

+	t2 += p2 + p3;                                                                                                                                             \

+	t1 += p2 + p4;                                                                                                                                             \

+	t0 += p1 + p3;

+static void stbi__idct_block(stbi_uc* out, int out_stride, short data[64]) {

+	int i, val[64], *v = val;

+	stbi_uc* o;

+	short* d = data;

+	// columns

+	for (i = 0; i < 8; ++i, ++d, ++v) {

+		// if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing

+		if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0) {

+			//    no shortcut                 0     seconds

+			//    (1|2|3|4|5|6|7)==0          0     seconds

+			//    all separate               -0.047 seconds

+			//    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds

+			int dcterm = d[0] * 4;

+			v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;

+		} else {

+			STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])

+			// constants scaled things up by 1<<12; let's bring them back

+			// down, but keep 2 extra bits of precision

+			x0 += 512;

+			x1 += 512;

+			x2 += 512;

+			x3 += 512;

+			v[0] = (x0 + t3) >> 10;

+			v[56] = (x0 - t3) >> 10;

+			v[8] = (x1 + t2) >> 10;

+			v[48] = (x1 - t2) >> 10;

+			v[16] = (x2 + t1) >> 10;

+			v[40] = (x2 - t1) >> 10;

+			v[24] = (x3 + t0) >> 10;

+			v[32] = (x3 - t0) >> 10;

+		}

+	}

+	for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {

+		// no fast case since the first 1D IDCT spread components out

+		STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])

+		// constants scaled things up by 1<<12, plus we had 1<<2 from first

+		// loop, plus horizontal and vertical each scale by sqrt(8) so together

+		// we've got an extra 1<<3, so 1<<17 total we need to remove.

+		// so we want to round that, which means adding 0.5 * 1<<17,

+		// aka 65536. Also, we'll end up with -128 to 127 that we want

+		// to encode as 0..255 by adding 128, so we'll add that before the shift

+		x0 += 65536 + (128 << 17);

+		x1 += 65536 + (128 << 17);

+		x2 += 65536 + (128 << 17);

+		x3 += 65536 + (128 << 17);

+		// tried computing the shifts into temps, or'ing the temps to see

+		// if any were out of range, but that was slower

+		o[0] = stbi__clamp((x0 + t3) >> 17);

+		o[7] = stbi__clamp((x0 - t3) >> 17);

+		o[1] = stbi__clamp((x1 + t2) >> 17);

+		o[6] = stbi__clamp((x1 - t2) >> 17);

+		o[2] = stbi__clamp((x2 + t1) >> 17);

+		o[5] = stbi__clamp((x2 - t1) >> 17);

+		o[3] = stbi__clamp((x3 + t0) >> 17);

+		o[4] = stbi__clamp((x3 - t0) >> 17);

+	}

+}

+#ifdef STBI_SSE2

+// sse2 integer IDCT. not the fastest possible implementation but it

+// produces bit-identical results to the generic C version so it's

+// fully "transparent".

+static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64]) {

+	// This is constructed to match our regular (generic) integer IDCT exactly.

+	__m128i row0, row1, row2, row3, row4, row5, row6, row7;

+	__m128i tmp;

+// dot product constant: even elems=x, odd elems=y

+#define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y))

+// out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)

+// out(1) = c1[even]*x + c1[odd]*y

+#define dct_rot(out0, out1, x, y, c0, c1)                                                                                                                      \

+	__m128i c0##lo = _mm_unpacklo_epi16((x), (y));                                                                                                             \

+	__m128i c0##hi = _mm_unpackhi_epi16((x), (y));                                                                                                             \

+	__m128i out0##_l = _mm_madd_epi16(c0##lo, c0);                                                                                                             \

+	__m128i out0##_h = _mm_madd_epi16(c0##hi, c0);                                                                                                             \

+	__m128i out1##_l = _mm_madd_epi16(c0##lo, c1);                                                                                                             \

+	__m128i out1##_h = _mm_madd_epi16(c0##hi, c1)

+// out = in << 12  (in 16-bit, out 32-bit)

+#define dct_widen(out, in)                                                                                                                                     \

+	__m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4);                                                                        \

+	__m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)

+// wide add

+#define dct_wadd(out, a, b)                                                                                                                                    \

+	__m128i out##_l = _mm_add_epi32(a##_l, b##_l);                                                                                                             \

+	__m128i out##_h = _mm_add_epi32(a##_h, b##_h)

+// wide sub

+#define dct_wsub(out, a, b)                                                                                                                                    \

+	__m128i out##_l = _mm_sub_epi32(a##_l, b##_l);                                                                                                             \

+	__m128i out##_h = _mm_sub_epi32(a##_h, b##_h)

+// butterfly a/b, add bias, then shift by "s" and pack

+#define dct_bfly32o(out0, out1, a, b, bias, s)                                                                                                                 \

+	{                                                                                                                                                          \

+		__m128i abiased_l = _mm_add_epi32(a##_l, bias);                                                                                                        \

+		__m128i abiased_h = _mm_add_epi32(a##_h, bias);                                                                                                        \

+		dct_wadd(sum, abiased, b);                                                                                                                             \

+		dct_wsub(dif, abiased, b);                                                                                                                             \

+		out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s));                                                                            \

+		out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s));                                                                            \

+	}

+// 8-bit interleave step (for transposes)

+#define dct_interleave8(a, b)                                                                                                                                  \

+	tmp = a;                                                                                                                                                   \

+	a = _mm_unpacklo_epi8(a, b);                                                                                                                               \

+	b = _mm_unpackhi_epi8(tmp, b)

+// 16-bit interleave step (for transposes)

+#define dct_interleave16(a, b)                                                                                                                                 \

+	tmp = a;                                                                                                                                                   \

+	a = _mm_unpacklo_epi16(a, b);                                                                                                                              \

+	b = _mm_unpackhi_epi16(tmp, b)

+#define dct_pass(bias, shift)                                                                                                                                  \

+	{                                                                                                                                                          \

+		/* even part */                                                                                                                                        \

+		dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1);                                                                                                         \

+		__m128i sum04 = _mm_add_epi16(row0, row4);                                                                                                             \

+		__m128i dif04 = _mm_sub_epi16(row0, row4);                                                                                                             \

+		dct_widen(t0e, sum04);                                                                                                                                 \

+		dct_widen(t1e, dif04);                                                                                                                                 \

+		dct_wadd(x0, t0e, t3e);                                                                                                                                \

+		dct_wsub(x3, t0e, t3e);                                                                                                                                \

+		dct_wadd(x1, t1e, t2e);                                                                                                                                \

+		dct_wsub(x2, t1e, t2e);                                                                                                                                \

+		/* odd part */                                                                                                                                         \

+		dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1);                                                                                                         \

+		dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1);                                                                                                         \

+		__m128i sum17 = _mm_add_epi16(row1, row7);                                                                                                             \

+		__m128i sum35 = _mm_add_epi16(row3, row5);                                                                                                             \

+		dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1);                                                                                                       \

+		dct_wadd(x4, y0o, y4o);                                                                                                                                \

+		dct_wadd(x5, y1o, y5o);                                                                                                                                \

+		dct_wadd(x6, y2o, y5o);                                                                                                                                \

+		dct_wadd(x7, y3o, y4o);                                                                                                                                \

+		dct_bfly32o(row0, row7, x0, x7, bias, shift);                                                                                                          \

+		dct_bfly32o(row1, row6, x1, x6, bias, shift);                                                                                                          \

+		dct_bfly32o(row2, row5, x2, x5, bias, shift);                                                                                                          \

+		dct_bfly32o(row3, row4, x3, x4, bias, shift);                                                                                                          \

+	}

+	__m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));

+	__m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));

+	__m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));

+	__m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));

+	__m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));

+	__m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));

+	__m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));

+	__m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));

+	// rounding biases in column/row passes, see stbi__idct_block for

+	// explanation.

+	__m128i bias_0 = _mm_set1_epi32(512);

+	__m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));

+	// load

+	row0 = _mm_load_si128((const __m128i*)(data + 0 * 8));

+	row1 = _mm_load_si128((const __m128i*)(data + 1 * 8));

+	row2 = _mm_load_si128((const __m128i*)(data + 2 * 8));

+	row3 = _mm_load_si128((const __m128i*)(data + 3 * 8));

+	row4 = _mm_load_si128((const __m128i*)(data + 4 * 8));

+	row5 = _mm_load_si128((const __m128i*)(data + 5 * 8));

+	row6 = _mm_load_si128((const __m128i*)(data + 6 * 8));

+	row7 = _mm_load_si128((const __m128i*)(data + 7 * 8));

+	// column pass

+	dct_pass(bias_0, 10);

+	{

+		// 16bit 8x8 transpose pass 1

+		dct_interleave16(row0, row4);

+		dct_interleave16(row1, row5);

+		dct_interleave16(row2, row6);

+		dct_interleave16(row3, row7);

+		// transpose pass 2

+		dct_interleave16(row0, row2);

+		dct_interleave16(row1, row3);

+		dct_interleave16(row4, row6);

+		dct_interleave16(row5, row7);

+		// transpose pass 3

+		dct_interleave16(row0, row1);

+		dct_interleave16(row2, row3);

+		dct_interleave16(row4, row5);

+		dct_interleave16(row6, row7);

+	}

+	// row pass

+	dct_pass(bias_1, 17);

+	{

+		// pack

+		__m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7

+		__m128i p1 = _mm_packus_epi16(row2, row3);

+		__m128i p2 = _mm_packus_epi16(row4, row5);

+		__m128i p3 = _mm_packus_epi16(row6, row7);

+		// 8bit 8x8 transpose pass 1

+		dct_interleave8(p0, p2); // a0e0a1e1...

+		dct_interleave8(p1, p3); // c0g0c1g1...

+		// transpose pass 2

+		dct_interleave8(p0, p1); // a0c0e0g0...

+		dct_interleave8(p2, p3); // b0d0f0h0...

+		// transpose pass 3

+		dct_interleave8(p0, p2); // a0b0c0d0...

+		dct_interleave8(p1, p3); // a4b4c4d4...

+		// store

+		_mm_storel_epi64((__m128i*)out, p0);

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p0, 0x4e));

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, p2);

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p2, 0x4e));

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, p1);

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p1, 0x4e));

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, p3);

+		out += out_stride;

+		_mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p3, 0x4e));

+	}

+#undef dct_const

+#undef dct_rot

+#undef dct_widen

+#undef dct_wadd

+#undef dct_wsub

+#undef dct_bfly32o

+#undef dct_interleave8

+#undef dct_interleave16

+#undef dct_pass

+}

+#endif // STBI_SSE2

+#ifdef STBI_NEON

+// NEON integer IDCT. should produce bit-identical

+// results to the generic C version.

+static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64]) {

+	int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;

+	int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));

+	int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));

+	int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));

+	int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));

+	int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));

+	int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));

+	int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));

+	int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));

+	int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));

+	int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));

+	int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));

+	int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));

+#define dct_long_mul(out, inq, coeff)                                                                                                                          \

+	int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff);                                                                                                   \

+	int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)

+#define dct_long_mac(out, acc, inq, coeff)                                                                                                                     \

+	int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff);                                                                                          \

+	int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)

+#define dct_widen(out, inq)                                                                                                                                    \

+	int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12);                                                                                                    \

+	int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)

+// wide add

+#define dct_wadd(out, a, b)                                                                                                                                    \

+	int32x4_t out##_l = vaddq_s32(a##_l, b##_l);                                                                                                               \

+	int32x4_t out##_h = vaddq_s32(a##_h, b##_h)

+// wide sub

+#define dct_wsub(out, a, b)                                                                                                                                    \

+	int32x4_t out##_l = vsubq_s32(a##_l, b##_l);                                                                                                               \

+	int32x4_t out##_h = vsubq_s32(a##_h, b##_h)

+// butterfly a/b, then shift using "shiftop" by "s" and pack

+#define dct_bfly32o(out0, out1, a, b, shiftop, s)                                                                                                              \

+	{                                                                                                                                                          \

+		dct_wadd(sum, a, b);                                                                                                                                   \

+		dct_wsub(dif, a, b);                                                                                                                                   \

+		out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s));                                                                                             \

+		out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s));                                                                                             \

+	}

+#define dct_pass(shiftop, shift)                                                                                                                               \

+	{                                                                                                                                                          \

+		/* even part */                                                                                                                                        \

+		int16x8_t sum26 = vaddq_s16(row2, row6);                                                                                                               \

+		dct_long_mul(p1e, sum26, rot0_0);                                                                                                                      \

+		dct_long_mac(t2e, p1e, row6, rot0_1);                                                                                                                  \

+		dct_long_mac(t3e, p1e, row2, rot0_2);                                                                                                                  \

+		int16x8_t sum04 = vaddq_s16(row0, row4);                                                                                                               \

+		int16x8_t dif04 = vsubq_s16(row0, row4);                                                                                                               \

+		dct_widen(t0e, sum04);                                                                                                                                 \

+		dct_widen(t1e, dif04);                                                                                                                                 \

+		dct_wadd(x0, t0e, t3e);                                                                                                                                \

+		dct_wsub(x3, t0e, t3e);                                                                                                                                \

+		dct_wadd(x1, t1e, t2e);                                                                                                                                \

+		dct_wsub(x2, t1e, t2e);                                                                                                                                \

+		/* odd part */                                                                                                                                         \

+		int16x8_t sum15 = vaddq_s16(row1, row5);                                                                                                               \

+		int16x8_t sum17 = vaddq_s16(row1, row7);                                                                                                               \

+		int16x8_t sum35 = vaddq_s16(row3, row5);                                                                                                               \

+		int16x8_t sum37 = vaddq_s16(row3, row7);                                                                                                               \

+		int16x8_t sumodd = vaddq_s16(sum17, sum35);                                                                                                            \

+		dct_long_mul(p5o, sumodd, rot1_0);                                                                                                                     \

+		dct_long_mac(p1o, p5o, sum17, rot1_1);                                                                                                                 \

+		dct_long_mac(p2o, p5o, sum35, rot1_2);                                                                                                                 \

+		dct_long_mul(p3o, sum37, rot2_0);                                                                                                                      \

+		dct_long_mul(p4o, sum15, rot2_1);                                                                                                                      \

+		dct_wadd(sump13o, p1o, p3o);                                                                                                                           \

+		dct_wadd(sump24o, p2o, p4o);                                                                                                                           \

+		dct_wadd(sump23o, p2o, p3o);                                                                                                                           \

+		dct_wadd(sump14o, p1o, p4o);                                                                                                                           \

+		dct_long_mac(x4, sump13o, row7, rot3_0);                                                                                                               \

+		dct_long_mac(x5, sump24o, row5, rot3_1);                                                                                                               \

+		dct_long_mac(x6, sump23o, row3, rot3_2);                                                                                                               \

+		dct_long_mac(x7, sump14o, row1, rot3_3);                                                                                                               \

+		dct_bfly32o(row0, row7, x0, x7, shiftop, shift);                                                                                                       \

+		dct_bfly32o(row1, row6, x1, x6, shiftop, shift);                                                                                                       \

+		dct_bfly32o(row2, row5, x2, x5, shiftop, shift);                                                                                                       \

+		dct_bfly32o(row3, row4, x3, x4, shiftop, shift);                                                                                                       \

+	}

+	// load

+	row0 = vld1q_s16(data + 0 * 8);

+	row1 = vld1q_s16(data + 1 * 8);

+	row2 = vld1q_s16(data + 2 * 8);

+	row3 = vld1q_s16(data + 3 * 8);

+	row4 = vld1q_s16(data + 4 * 8);

+	row5 = vld1q_s16(data + 5 * 8);

+	row6 = vld1q_s16(data + 6 * 8);

+	row7 = vld1q_s16(data + 7 * 8);

+	// add DC bias

+	row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));

+	// column pass

+	dct_pass(vrshrn_n_s32, 10);

+	// 16bit 8x8 transpose

+	{

+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.

+// whether compilers actually get this is another story, sadly.

+#define dct_trn16(x, y)                                                                                                                                        \

+	{                                                                                                                                                          \

+		int16x8x2_t t = vtrnq_s16(x, y);                                                                                                                       \

+		x = t.val[0];                                                                                                                                          \

+		y = t.val[1];                                                                                                                                          \

+	}

+#define dct_trn32(x, y)                                                                                                                                        \

+	{                                                                                                                                                          \

+		int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y));                                                                         \

+		x = vreinterpretq_s16_s32(t.val[0]);                                                                                                                   \

+		y = vreinterpretq_s16_s32(t.val[1]);                                                                                                                   \

+	}

+#define dct_trn64(x, y)                                                                                                                                        \

+	{                                                                                                                                                          \

+		int16x8_t x0 = x;                                                                                                                                      \

+		int16x8_t y0 = y;                                                                                                                                      \

+		x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0));                                                                                                  \

+		y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0));                                                                                                \

+	}

+		// pass 1

+		dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6

+		dct_trn16(row2, row3);

+		dct_trn16(row4, row5);

+		dct_trn16(row6, row7);

+		// pass 2

+		dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4

+		dct_trn32(row1, row3);

+		dct_trn32(row4, row6);

+		dct_trn32(row5, row7);

+		// pass 3

+		dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0

+		dct_trn64(row1, row5);

+		dct_trn64(row2, row6);

+		dct_trn64(row3, row7);

+#undef dct_trn16

+#undef dct_trn32

+#undef dct_trn64

+	}

+	// row pass

+	// vrshrn_n_s32 only supports shifts up to 16, we need

+	// 17. so do a non-rounding shift of 16 first then follow

+	// up with a rounding shift by 1.

+	dct_pass(vshrn_n_s32, 16);

+	{

+		// pack and round

+		uint8x8_t p0 = vqrshrun_n_s16(row0, 1);

+		uint8x8_t p1 = vqrshrun_n_s16(row1, 1);

+		uint8x8_t p2 = vqrshrun_n_s16(row2, 1);

+		uint8x8_t p3 = vqrshrun_n_s16(row3, 1);

+		uint8x8_t p4 = vqrshrun_n_s16(row4, 1);

+		uint8x8_t p5 = vqrshrun_n_s16(row5, 1);

+		uint8x8_t p6 = vqrshrun_n_s16(row6, 1);

+		uint8x8_t p7 = vqrshrun_n_s16(row7, 1);

+		// again, these can translate into one instruction, but often don't.

+#define dct_trn8_8(x, y)                                                                                                                                       \

+	{                                                                                                                                                          \

+		uint8x8x2_t t = vtrn_u8(x, y);                                                                                                                         \

+		x = t.val[0];                                                                                                                                          \

+		y = t.val[1];                                                                                                                                          \

+	}

+#define dct_trn8_16(x, y)                                                                                                                                      \

+	{                                                                                                                                                          \

+		uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y));                                                                             \

+		x = vreinterpret_u8_u16(t.val[0]);                                                                                                                     \

+		y = vreinterpret_u8_u16(t.val[1]);                                                                                                                     \

+	}

+#define dct_trn8_32(x, y)                                                                                                                                      \

+	{                                                                                                                                                          \

+		uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y));                                                                             \

+		x = vreinterpret_u8_u32(t.val[0]);                                                                                                                     \

+		y = vreinterpret_u8_u32(t.val[1]);                                                                                                                     \

+	}

+		// sadly can't use interleaved stores here since we only write

+		// 8 bytes to each scan line!

+		// 8x8 8-bit transpose pass 1

+		dct_trn8_8(p0, p1);

+		dct_trn8_8(p2, p3);

+		dct_trn8_8(p4, p5);

+		dct_trn8_8(p6, p7);

+		// pass 2

+		dct_trn8_16(p0, p2);

+		dct_trn8_16(p1, p3);

+		dct_trn8_16(p4, p6);

+		dct_trn8_16(p5, p7);

+		// pass 3

+		dct_trn8_32(p0, p4);

+		dct_trn8_32(p1, p5);

+		dct_trn8_32(p2, p6);

+		dct_trn8_32(p3, p7);

+		// store

+		vst1_u8(out, p0);

+		out += out_stride;

+		vst1_u8(out, p1);

+		out += out_stride;

+		vst1_u8(out, p2);

+		out += out_stride;

+		vst1_u8(out, p3);

+		out += out_stride;

+		vst1_u8(out, p4);

+		out += out_stride;

+		vst1_u8(out, p5);

+		out += out_stride;

+		vst1_u8(out, p6);

+		out += out_stride;

+		vst1_u8(out, p7);

+#undef dct_trn8_8

+#undef dct_trn8_16

+#undef dct_trn8_32

+	}

+#undef dct_long_mul

+#undef dct_long_mac

+#undef dct_widen

+#undef dct_wadd

+#undef dct_wsub

+#undef dct_bfly32o

+#undef dct_pass

+}

+#endif // STBI_NEON

+#define STBI__MARKER_none 0xff

+// if there's a pending marker from the entropy stream, return that

+// otherwise, fetch from the stream and get a marker. if there's no

+// marker, return 0xff, which is never a valid marker value

+static stbi_uc stbi__get_marker(stbi__jpeg* j) {

+	stbi_uc x;

+	if (j->marker != STBI__MARKER_none) {

+		x = j->marker;

+		j->marker = STBI__MARKER_none;

+		return x;

+	}

+	x = stbi__get8(j->s);

+	if (x != 0xff)

+		return STBI__MARKER_none;

+	while (x == 0xff)

+		x = stbi__get8(j->s); // consume repeated 0xff fill bytes

+	return x;

+}

+// in each scan, we'll have scan_n components, and the order

+// of the components is specified by order[]

+#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)

+// after a restart interval, stbi__jpeg_reset the entropy decoder and

+// the dc prediction

+static void stbi__jpeg_reset(stbi__jpeg* j) {

+	j->code_bits = 0;

+	j->code_buffer = 0;

+	j->nomore = 0;

+	j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;

+	j->marker = STBI__MARKER_none;

+	j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;

+	j->eob_run = 0;

+	// no more than 1<<31 MCUs if no restart_interal? that's plenty safe,

+	// since we don't even allow 1<<30 pixels

+}

+static int stbi__parse_entropy_coded_data(stbi__jpeg* z) {

+	stbi__jpeg_reset(z);

+	if (!z->progressive) {

+		if (z->scan_n == 1) {

+			int i, j;

+			STBI_SIMD_ALIGN(short, data[64]);

+			int n = z->order[0];

+			// non-interleaved data, we just need to process one block at a

+			// time, in trivial scanline order number of blocks to do just

+			// depends on how many actual "pixels" this component has,

+			// independent of interleaved MCU blocking and such

+			int w = (z->img_comp[n].x + 7) >> 3;

+			int h = (z->img_comp[n].y + 7) >> 3;

+			for (j = 0; j < h; ++j) {

+				for (i = 0; i < w; ++i) {

+					int ha = z->img_comp[n].ha;

+					if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))

+						return 0;

+					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);

+					// every data block is an MCU, so countdown the restart

+					// interval

+					if (--z->todo <= 0) {

+						if (z->code_bits < 24)

+							stbi__grow_buffer_unsafe(z);

+						// if it's NOT a restart, then just bail, so we get

+						// corrupt data rather than no data

+						if (!STBI__RESTART(z->marker))

+							return 1;

+						stbi__jpeg_reset(z);

+					}

+				}

+			}

+			return 1;

+		} else { // interleaved

+			int i, j, k, x, y;

+			STBI_SIMD_ALIGN(short, data[64]);

+			for (j = 0; j < z->img_mcu_y; ++j) {

+				for (i = 0; i < z->img_mcu_x; ++i) {

+					// scan an interleaved mcu... process scan_n components in

+					// order

+					for (k = 0; k < z->scan_n; ++k) {

+						int n = z->order[k];

+						// scan out an mcu's worth of this component; that's

+						// just determined by the basic H and V specified for

+						// the component

+						for (y = 0; y < z->img_comp[n].v; ++y) {

+							for (x = 0; x < z->img_comp[n].h; ++x) {

+								int x2 = (i * z->img_comp[n].h + x) * 8;

+								int y2 = (j * z->img_comp[n].v + y) * 8;

+								int ha = z->img_comp[n].ha;

+								if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n,

+															 z->dequant[z->img_comp[n].tq]))

+									return 0;

+								z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data);

+							}

+						}

+					}

+					// after all interleaved components, that's an interleaved

+					// MCU, so now count down the restart interval

+					if (--z->todo <= 0) {

+						if (z->code_bits < 24)

+							stbi__grow_buffer_unsafe(z);

+						if (!STBI__RESTART(z->marker))

+							return 1;

+						stbi__jpeg_reset(z);

+					}

+				}

+			}

+			return 1;

+		}

+	} else {

+		if (z->scan_n == 1) {

+			int i, j;

+			int n = z->order[0];

+			// non-interleaved data, we just need to process one block at a

+			// time, in trivial scanline order number of blocks to do just

+			// depends on how many actual "pixels" this component has,

+			// independent of interleaved MCU blocking and such

+			int w = (z->img_comp[n].x + 7) >> 3;

+			int h = (z->img_comp[n].y + 7) >> 3;

+			for (j = 0; j < h; ++j) {

+				for (i = 0; i < w; ++i) {

+					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);

+					if (z->spec_start == 0) {

+						if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))

+							return 0;

+					} else {

+						int ha = z->img_comp[n].ha;

+						if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))

+							return 0;

+					}

+					// every data block is an MCU, so countdown the restart

+					// interval

+					if (--z->todo <= 0) {

+						if (z->code_bits < 24)

+							stbi__grow_buffer_unsafe(z);

+						if (!STBI__RESTART(z->marker))

+							return 1;

+						stbi__jpeg_reset(z);

+					}

+				}

+			}

+			return 1;

+		} else { // interleaved

+			int i, j, k, x, y;

+			for (j = 0; j < z->img_mcu_y; ++j) {

+				for (i = 0; i < z->img_mcu_x; ++i) {

+					// scan an interleaved mcu... process scan_n components in

+					// order

+					for (k = 0; k < z->scan_n; ++k) {

+						int n = z->order[k];

+						// scan out an mcu's worth of this component; that's

+						// just determined by the basic H and V specified for

+						// the component

+						for (y = 0; y < z->img_comp[n].v; ++y) {

+							for (x = 0; x < z->img_comp[n].h; ++x) {

+								int x2 = (i * z->img_comp[n].h + x);

+								int y2 = (j * z->img_comp[n].v + y);

+								short* data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);

+								if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))

+									return 0;

+							}

+						}

+					}

+					// after all interleaved components, that's an interleaved

+					// MCU, so now count down the restart interval

+					if (--z->todo <= 0) {

+						if (z->code_bits < 24)

+							stbi__grow_buffer_unsafe(z);

+						if (!STBI__RESTART(z->marker))

+							return 1;

+						stbi__jpeg_reset(z);

+					}

+				}

+			}

+			return 1;

+		}

+	}

+}

+static void stbi__jpeg_dequantize(short* data, stbi__uint16* dequant) {

+	int i;

+	for (i = 0; i < 64; ++i)

+		data[i] *= dequant[i];

+}

+static void stbi__jpeg_finish(stbi__jpeg* z) {

+	if (z->progressive) {

+		// dequantize and idct the data

+		int i, j, n;

+		for (n = 0; n < z->s->img_n; ++n) {

+			int w = (z->img_comp[n].x + 7) >> 3;

+			int h = (z->img_comp[n].y + 7) >> 3;

+			for (j = 0; j < h; ++j) {

+				for (i = 0; i < w; ++i) {

+					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);

+					stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);

+					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);

+				}

+			}

+		}

+	}

+}

+static int stbi__process_marker(stbi__jpeg* z, int m) {

+	int L;

+	switch (m) {

+	case STBI__MARKER_none: // no marker found

+		return stbi__err("expected marker", "Corrupt JPEG");

+	case 0xDD: // DRI - specify restart interval

+		if (stbi__get16be(z->s) != 4)

+			return stbi__err("bad DRI len", "Corrupt JPEG");

+		z->restart_interval = stbi__get16be(z->s);

+		return 1;

+	case 0xDB: // DQT - define quantization table

+		L = stbi__get16be(z->s) - 2;

+		while (L > 0) {

+			int q = stbi__get8(z->s);

+			int p = q >> 4, sixteen = (p != 0);

+			int t = q & 15, i;

+			if (p != 0 && p != 1)

+				return stbi__err("bad DQT type", "Corrupt JPEG");

+			if (t > 3)

+				return stbi__err("bad DQT table", "Corrupt JPEG");

+			for (i = 0; i < 64; ++i)

+				z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));

+			L -= (sixteen ? 129 : 65);

+		}

+		return L == 0;

+	case 0xC4: // DHT - define huffman table

+		L = stbi__get16be(z->s) - 2;

+		while (L > 0) {

+			stbi_uc* v;

+			int sizes[16], i, n = 0;

+			int q = stbi__get8(z->s);

+			int tc = q >> 4;

+			int th = q & 15;

+			if (tc > 1 || th > 3)

+				return stbi__err("bad DHT header", "Corrupt JPEG");

+			for (i = 0; i < 16; ++i) {

+				sizes[i] = stbi__get8(z->s);

+				n += sizes[i];

+			}

+			L -= 17;

+			if (tc == 0) {

+				if (!stbi__build_huffman(z->huff_dc + th, sizes))

+					return 0;

+				v = z->huff_dc[th].values;

+			} else {

+				if (!stbi__build_huffman(z->huff_ac + th, sizes))

+					return 0;

+				v = z->huff_ac[th].values;

+			}

+			for (i = 0; i < n; ++i)

+				v[i] = stbi__get8(z->s);

+			if (tc != 0)

+				stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);

+			L -= n;

+		}

+		return L == 0;

+	}

+	// check for comment block or APP blocks

+	if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {

+		L = stbi__get16be(z->s);

+		if (L < 2) {

+			if (m == 0xFE)

+				return stbi__err("bad COM len", "Corrupt JPEG");

+			else

+				return stbi__err("bad APP len", "Corrupt JPEG");

+		}

+		L -= 2;

+		if (m == 0xE0 && L >= 5) { // JFIF APP0 segment

+			static const unsigned char tag[5] = {'J', 'F', 'I', 'F', '\0'};

+			int ok = 1;

+			int i;

+			for (i = 0; i < 5; ++i)

+				if (stbi__get8(z->s) != tag[i])

+					ok = 0;

+			L -= 5;

+			if (ok)

+				z->jfif = 1;

+		} else if (m == 0xEE && L >= 12) { // Adobe APP14 segment

+			static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'};

+			int ok = 1;

+			int i;

+			for (i = 0; i < 6; ++i)

+				if (stbi__get8(z->s) != tag[i])

+					ok = 0;

+			L -= 6;

+			if (ok) {

+				stbi__get8(z->s);							 // version

+				stbi__get16be(z->s);						 // flags0

+				stbi__get16be(z->s);						 // flags1

+				z->app14_color_transform = stbi__get8(z->s); // color transform

+				L -= 6;

+			}

+		}

+		stbi__skip(z->s, L);

+		return 1;

+	}

+	return stbi__err("unknown marker", "Corrupt JPEG");

+}

+// after we see SOS

+static int stbi__process_scan_header(stbi__jpeg* z) {

+	int i;

+	int Ls = stbi__get16be(z->s);

+	z->scan_n = stbi__get8(z->s);

+	if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n)

+		return stbi__err("bad SOS component count", "Corrupt JPEG");

+	if (Ls != 6 + 2 * z->scan_n)

+		return stbi__err("bad SOS len", "Corrupt JPEG");

+	for (i = 0; i < z->scan_n; ++i) {

+		int id = stbi__get8(z->s), which;

+		int q = stbi__get8(z->s);

+		for (which = 0; which < z->s->img_n; ++which)

+			if (z->img_comp[which].id == id)

+				break;

+		if (which == z->s->img_n)

+			return 0; // no match

+		z->img_comp[which].hd = q >> 4;

+		if (z->img_comp[which].hd > 3)

+			return stbi__err("bad DC huff", "Corrupt JPEG");

+		z->img_comp[which].ha = q & 15;

+		if (z->img_comp[which].ha > 3)

+			return stbi__err("bad AC huff", "Corrupt JPEG");

+		z->order[i] = which;

+	}

+	{

+		int aa;

+		z->spec_start = stbi__get8(z->s);

+		z->spec_end = stbi__get8(z->s); // should be 63, but might be 0

+		aa = stbi__get8(z->s);

+		z->succ_high = (aa >> 4);

+		z->succ_low = (aa & 15);

+		if (z->progressive) {

+			if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)

+				return stbi__err("bad SOS", "Corrupt JPEG");

+		} else {

+			if (z->spec_start != 0)

+				return stbi__err("bad SOS", "Corrupt JPEG");

+			if (z->succ_high != 0 || z->succ_low != 0)

+				return stbi__err("bad SOS", "Corrupt JPEG");

+			z->spec_end = 63;

+		}

+	}

+	return 1;

+}

+static int stbi__free_jpeg_components(stbi__jpeg* z, int ncomp, int why) {

+	int i;

+	for (i = 0; i < ncomp; ++i) {

+		if (z->img_comp[i].raw_data) {

+			STBI_FREE(z->img_comp[i].raw_data);

+			z->img_comp[i].raw_data = NULL;

+			z->img_comp[i].data = NULL;

+		}

+		if (z->img_comp[i].raw_coeff) {

+			STBI_FREE(z->img_comp[i].raw_coeff);

+			z->img_comp[i].raw_coeff = 0;

+			z->img_comp[i].coeff = 0;

+		}

+		if (z->img_comp[i].linebuf) {

+			STBI_FREE(z->img_comp[i].linebuf);

+			z->img_comp[i].linebuf = NULL;

+		}

+	}

+	return why;

+}

+static int stbi__process_frame_header(stbi__jpeg* z, int scan) {

+	stbi__context* s = z->s;

+	int Lf, p, i, q, h_max = 1, v_max = 1, c;

+	Lf = stbi__get16be(s);

+	if (Lf < 11)

+		return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG

+	p = stbi__get8(s);

+	if (p != 8)

+		return stbi__err("only 8-bit",

+						 "JPEG format not supported: 8-bit only"); // JPEG baseline

+	s->img_y = stbi__get16be(s);

+	if (s->img_y == 0)

+		return stbi__err("no header height",

+						 "JPEG format not supported: delayed height"); // Legal, but we don't

+																	   // handle it--but

+																	   // neither does IJG

+	s->img_x = stbi__get16be(s);

+	if (s->img_x == 0)

+		return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires

+	c = stbi__get8(s);

+	if (c != 3 && c != 1 && c != 4)

+		return stbi__err("bad component count", "Corrupt JPEG");

+	s->img_n = c;

+	for (i = 0; i < c; ++i) {

+		z->img_comp[i].data = NULL;

+		z->img_comp[i].linebuf = NULL;

+	}

+	if (Lf != 8 + 3 * s->img_n)

+		return stbi__err("bad SOF len", "Corrupt JPEG");

+	z->rgb = 0;

+	for (i = 0; i < s->img_n; ++i) {

+		static const unsigned char rgb[3] = {'R', 'G', 'B'};

+		z->img_comp[i].id = stbi__get8(s);

+		if (s->img_n == 3 && z->img_comp[i].id == rgb[i])

+			++z->rgb;

+		q = stbi__get8(s);

+		z->img_comp[i].h = (q >> 4);

+		if (!z->img_comp[i].h || z->img_comp[i].h > 4)

+			return stbi__err("bad H", "Corrupt JPEG");

+		z->img_comp[i].v = q & 15;

+		if (!z->img_comp[i].v || z->img_comp[i].v > 4)

+			return stbi__err("bad V", "Corrupt JPEG");

+		z->img_comp[i].tq = stbi__get8(s);

+		if (z->img_comp[i].tq > 3)

+			return stbi__err("bad TQ", "Corrupt JPEG");

+	}

+	if (scan != STBI__SCAN_load)

+		return 1;

+	if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0))

+		return stbi__err("too large", "Image too large to decode");

+	for (i = 0; i < s->img_n; ++i) {

+		if (z->img_comp[i].h > h_max)

+			h_max = z->img_comp[i].h;

+		if (z->img_comp[i].v > v_max)

+			v_max = z->img_comp[i].v;

+	}

+	// compute interleaved mcu info

+	z->img_h_max = h_max;

+	z->img_v_max = v_max;

+	z->img_mcu_w = h_max * 8;

+	z->img_mcu_h = v_max * 8;

+	// these sizes can't be more than 17 bits

+	z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;

+	z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;

+	for (i = 0; i < s->img_n; ++i) {

+		// number of effective pixels (e.g. for non-interleaved MCU)

+		z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;

+		z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;

+		// to simplify generation, we'll allocate enough memory to decode

+		// the bogus oversized data from using interleaved MCUs and their

+		// big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't

+		// discard the extra data until colorspace conversion

+		//

+		// img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked

+		// earlier) so these muls can't overflow with 32-bit ints (which we

+		// require)

+		z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;

+		z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;

+		z->img_comp[i].coeff = 0;

+		z->img_comp[i].raw_coeff = 0;

+		z->img_comp[i].linebuf = NULL;

+		z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);

+		if (z->img_comp[i].raw_data == NULL)

+			return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));

+		// align blocks for idct using mmx/sse

+		z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);

+		if (z->progressive) {

+			// w2, h2 are multiples of 8 (see above)

+			z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;

+			z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;

+			z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);

+			if (z->img_comp[i].raw_coeff == NULL)

+				return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));

+			z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);

+		}

+	}

+	return 1;

+}

+// use comparisons since in some cases we handle more than one case (e.g. SOF)

+#define stbi__DNL(x) ((x) == 0xdc)

+#define stbi__SOI(x) ((x) == 0xd8)

+#define stbi__EOI(x) ((x) == 0xd9)

+#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)

+#define stbi__SOS(x) ((x) == 0xda)

+#define stbi__SOF_progressive(x) ((x) == 0xc2)

+static int stbi__decode_jpeg_header(stbi__jpeg* z, int scan) {

+	int m;

+	z->jfif = 0;

+	z->app14_color_transform = -1; // valid values are 0,1,2

+	z->marker = STBI__MARKER_none; // initialize cached marker to empty

+	m = stbi__get_marker(z);

+	if (!stbi__SOI(m))

+		return stbi__err("no SOI", "Corrupt JPEG");

+	if (scan == STBI__SCAN_type)

+		return 1;

+	m = stbi__get_marker(z);

+	while (!stbi__SOF(m)) {

+		if (!stbi__process_marker(z, m))

+			return 0;

+		m = stbi__get_marker(z);

+		while (m == STBI__MARKER_none) {

+			// some files have extra padding after their blocks, so ok, we'll

+			// scan

+			if (stbi__at_eof(z->s))

+				return stbi__err("no SOF", "Corrupt JPEG");

+			m = stbi__get_marker(z);

+		}

+	}

+	z->progressive = stbi__SOF_progressive(m);

+	if (!stbi__process_frame_header(z, scan))

+		return 0;

+	return 1;

+}

+// decode image to YCbCr format

+static int stbi__decode_jpeg_image(stbi__jpeg* j) {

+	int m;

+	for (m = 0; m < 4; m++) {

+		j->img_comp[m].raw_data = NULL;

+		j->img_comp[m].raw_coeff = NULL;

+	}

+	j->restart_interval = 0;

+	if (!stbi__decode_jpeg_header(j, STBI__SCAN_load))

+		return 0;

+	m = stbi__get_marker(j);

+	while (!stbi__EOI(m)) {

+		if (stbi__SOS(m)) {

+			if (!stbi__process_scan_header(j))

+				return 0;

+			if (!stbi__parse_entropy_coded_data(j))

+				return 0;

+			if (j->marker == STBI__MARKER_none) {

+				// handle 0s at the end of image data from IP Kamera 9060

+				while (!stbi__at_eof(j->s)) {

+					int x = stbi__get8(j->s);

+					if (x == 255) {

+						j->marker = stbi__get8(j->s);

+						break;

+					}

+				}

+				// if we reach eof without hitting a marker, stbi__get_marker()

+				// below will fail and we'll eventually return 0

+			}

+		} else if (stbi__DNL(m)) {

+			int Ld = stbi__get16be(j->s);

+			stbi__uint32 NL = stbi__get16be(j->s);

+			if (Ld != 4)

+				return stbi__err("bad DNL len", "Corrupt JPEG");

+			if (NL != j->s->img_y)

+				return stbi__err("bad DNL height", "Corrupt JPEG");

+		} else {

+			if (!stbi__process_marker(j, m))

+				return 0;

+		}

+		m = stbi__get_marker(j);

+	}

+	if (j->progressive)

+		stbi__jpeg_finish(j);

+	return 1;

+}

+// static jfif-centered resampling (across block boundaries)

+typedef stbi_uc* (*resample_row_func)(stbi_uc* out, stbi_uc* in0, stbi_uc* in1, int w, int hs);

+#define stbi__div4(x) ((stbi_uc)((x) >> 2))

+static stbi_uc* resample_row_1(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

+	STBI_NOTUSED(out);

+	STBI_NOTUSED(in_far);

+	STBI_NOTUSED(w);

+	STBI_NOTUSED(hs);

+	return in_near;

+}

+static stbi_uc* stbi__resample_row_v_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

+	// need to generate two samples vertically for every one in input

+	int i;

+	STBI_NOTUSED(hs);

+	for (i = 0; i < w; ++i)

+		out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);

+	return out;

+}

+static stbi_uc* stbi__resample_row_h_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

+	// need to generate two samples horizontally for every one in input

+	int i;

+	stbi_uc* input = in_near;

+	if (w == 1) {

+		// if only one sample, can't do any interpolation

+		out[0] = out[1] = input[0];

+		return out;

+	}

+	out[0] = input[0];

+	out[1] = stbi__div4(input[0] * 3 + input[1] + 2);

+	for (i = 1; i < w - 1; ++i) {

+		int n = 3 * input[i] + 2;

+		out[i * 2 + 0] = stbi__div4(n + input[i - 1]);

+		out[i * 2 + 1] = stbi__div4(n + input[i + 1]);

+	}

+	out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);

+	out[i * 2 + 1] = input[w - 1];

+	STBI_NOTUSED(in_far);

+	STBI_NOTUSED(hs);

+	return out;

+}

+#define stbi__div16(x) ((stbi_uc)((x) >> 4))

+static stbi_uc* stbi__resample_row_hv_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

+	// need to generate 2x2 samples for every one in input

+	int i, t0, t1;

+	if (w == 1) {

+		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);

+		return out;

+	}

+	t1 = 3 * in_near[0] + in_far[0];

+	out[0] = stbi__div4(t1 + 2);

+	for (i = 1; i < w; ++i) {

+		t0 = t1;

+		t1 = 3 * in_near[i] + in_far[i];

+		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);

+		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);

+	}

+	out[w * 2 - 1] = stbi__div4(t1 + 2);

+	STBI_NOTUSED(hs);

+	return out;

+}

+#if defined(STBI_SSE2) || defined(STBI_NEON)

+static stbi_uc* stbi__resample_row_hv_2_simd(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

+	// need to generate 2x2 samples for every one in input

+	int i = 0, t0, t1;

+	if (w == 1) {

+		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);

+		return out;

+	}

+	t1 = 3 * in_near[0] + in_far[0];

+	// process groups of 8 pixels for as long as we can.

+	// note we can't handle the last pixel in a row in this loop

+	// because we need to handle the filter boundary conditions.

+	for (; i < ((w - 1) & ~7); i += 8) {

+#if defined(STBI_SSE2)

+		// load and perform the vertical filtering pass

+		// this uses 3*x + y = 4*x + (y - x)

+		__m128i zero = _mm_setzero_si128();

+		__m128i farb = _mm_loadl_epi64((__m128i*)(in_far + i));

+		__m128i nearb = _mm_loadl_epi64((__m128i*)(in_near + i));

+		__m128i farw = _mm_unpacklo_epi8(farb, zero);

+		__m128i nearw = _mm_unpacklo_epi8(nearb, zero);

+		__m128i diff = _mm_sub_epi16(farw, nearw);

+		__m128i nears = _mm_slli_epi16(nearw, 2);

+		__m128i curr = _mm_add_epi16(nears, diff); // current row

+		// horizontal filter works the same based on shifted vers of current

+		// row. "prev" is current row shifted right by 1 pixel; we need to

+		// insert the previous pixel value (from t1).

+		// "next" is current row shifted left by 1 pixel, with first pixel

+		// of next block of 8 pixels added in.

+		__m128i prv0 = _mm_slli_si128(curr, 2);

+		__m128i nxt0 = _mm_srli_si128(curr, 2);

+		__m128i prev = _mm_insert_epi16(prv0, t1, 0);

+		__m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);

+		// horizontal filter, polyphase implementation since it's convenient:

+		// even pixels = 3*cur + prev = cur*4 + (prev - cur)

+		// odd  pixels = 3*cur + next = cur*4 + (next - cur)

+		// note the shared term.

+		__m128i bias = _mm_set1_epi16(8);

+		__m128i curs = _mm_slli_epi16(curr, 2);

+		__m128i prvd = _mm_sub_epi16(prev, curr);

+		__m128i nxtd = _mm_sub_epi16(next, curr);

+		__m128i curb = _mm_add_epi16(curs, bias);

+		__m128i even = _mm_add_epi16(prvd, curb);

+		__m128i odd = _mm_add_epi16(nxtd, curb);

+		// interleave even and odd pixels, then undo scaling.

+		__m128i int0 = _mm_unpacklo_epi16(even, odd);

+		__m128i int1 = _mm_unpackhi_epi16(even, odd);

+		__m128i de0 = _mm_srli_epi16(int0, 4);

+		__m128i de1 = _mm_srli_epi16(int1, 4);

+		// pack and write output

+		__m128i outv = _mm_packus_epi16(de0, de1);

+		_mm_storeu_si128((__m128i*)(out + i * 2), outv);

+#elif defined(STBI_NEON)

+		// load and perform the vertical filtering pass

+		// this uses 3*x + y = 4*x + (y - x)

+		uint8x8_t farb = vld1_u8(in_far + i);

+		uint8x8_t nearb = vld1_u8(in_near + i);

+		int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));

+		int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));

+		int16x8_t curr = vaddq_s16(nears, diff); // current row

+		// horizontal filter works the same based on shifted vers of current

+		// row. "prev" is current row shifted right by 1 pixel; we need to

+		// insert the previous pixel value (from t1).

+		// "next" is current row shifted left by 1 pixel, with first pixel

+		// of next block of 8 pixels added in.

+		int16x8_t prv0 = vextq_s16(curr, curr, 7);

+		int16x8_t nxt0 = vextq_s16(curr, curr, 1);

+		int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);

+		int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);

+		// horizontal filter, polyphase implementation since it's convenient:

+		// even pixels = 3*cur + prev = cur*4 + (prev - cur)

+		// odd  pixels = 3*cur + next = cur*4 + (next - cur)

+		// note the shared term.

+		int16x8_t curs = vshlq_n_s16(curr, 2);

+		int16x8_t prvd = vsubq_s16(prev, curr);

+		int16x8_t nxtd = vsubq_s16(next, curr);

+		int16x8_t even = vaddq_s16(curs, prvd);

+		int16x8_t odd = vaddq_s16(curs, nxtd);

+		// undo scaling and round, then store with even/odd phases interleaved

+		uint8x8x2_t o;

+		o.val[0] = vqrshrun_n_s16(even, 4);

+		o.val[1] = vqrshrun_n_s16(odd, 4);

+		vst2_u8(out + i * 2, o);

+#endif

+		// "previous" value for next iter

+		t1 = 3 * in_near[i + 7] + in_far[i + 7];

+	}

+	t0 = t1;

+	t1 = 3 * in_near[i] + in_far[i];

+	out[i * 2] = stbi__div16(3 * t1 + t0 + 8);

+	for (++i; i < w; ++i) {

+		t0 = t1;

+		t1 = 3 * in_near[i] + in_far[i];

+		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);

+		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);

+	}

+	out[w * 2 - 1] = stbi__div4(t1 + 2);

+	STBI_NOTUSED(hs);

+	return out;

+}

+#endif

+static stbi_uc* stbi__resample_row_generic(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {

+	// resample with nearest-neighbor

+	int i, j;

+	STBI_NOTUSED(in_far);

+	for (i = 0; i < w; ++i)

+		for (j = 0; j < hs; ++j)

+			out[i * hs + j] = in_near[i];

+	return out;

+}

+// this is a reduced-precision calculation of YCbCr-to-RGB introduced

+// to make sure the code produces the same results in both SIMD and scalar

+#define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8)

+static void stbi__YCbCr_to_RGB_row(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step) {

+	int i;

+	for (i = 0; i < count; ++i) {

+		int y_fixed = (y[i] << 20) + (1 << 19); // rounding

+		int r, g, b;

+		int cr = pcr[i] - 128;

+		int cb = pcb[i] - 128;

+		r = y_fixed + cr * stbi__float2fixed(1.40200f);

+		g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);

+		b = y_fixed + cb * stbi__float2fixed(1.77200f);

+		r >>= 20;

+		g >>= 20;

+		b >>= 20;

+		if ((unsigned)r > 255) {

+			if (r < 0)

+				r = 0;

+			else

+				r = 255;

+		}

+		if ((unsigned)g > 255) {

+			if (g < 0)

+				g = 0;

+			else

+				g = 255;

+		}

+		if ((unsigned)b > 255) {

+			if (b < 0)

+				b = 0;

+			else

+				b = 255;

+		}

+		out[0] = (stbi_uc)r;

+		out[1] = (stbi_uc)g;

+		out[2] = (stbi_uc)b;

+		out[3] = 255;

+		out += step;

+	}

+}

+#if defined(STBI_SSE2) || defined(STBI_NEON)

+static void stbi__YCbCr_to_RGB_simd(stbi_uc* out, stbi_uc const* y, stbi_uc const* pcb, stbi_uc const* pcr, int count, int step) {

+	int i = 0;

+#ifdef STBI_SSE2

+	// step == 3 is pretty ugly on the final interleave, and i'm not convinced

+	// it's useful in practice (you wouldn't use it for textures, for example).

+	// so just accelerate step == 4 case.

+	if (step == 4) {

+		// this is a fairly straightforward implementation and not

+		// super-optimized.

+		__m128i signflip = _mm_set1_epi8(-0x80);

+		__m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));

+		__m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));

+		__m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));

+		__m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));

+		__m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);

+		__m128i xw = _mm_set1_epi16(255); // alpha channel

+		for (; i + 7 < count; i += 8) {

+			// load

+			__m128i y_bytes = _mm_loadl_epi64((__m128i*)(y + i));

+			__m128i cr_bytes = _mm_loadl_epi64((__m128i*)(pcr + i));

+			__m128i cb_bytes = _mm_loadl_epi64((__m128i*)(pcb + i));

+			__m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128

+			__m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128

+			// unpack to short (and left-shift cr, cb by 8)

+			__m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);

+			__m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);

+			__m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);

+			// color transform

+			__m128i yws = _mm_srli_epi16(yw, 4);

+			__m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);

+			__m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);

+			__m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);

+			__m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);

+			__m128i rws = _mm_add_epi16(cr0, yws);

+			__m128i gwt = _mm_add_epi16(cb0, yws);

+			__m128i bws = _mm_add_epi16(yws, cb1);

+			__m128i gws = _mm_add_epi16(gwt, cr1);

+			// descale

+			__m128i rw = _mm_srai_epi16(rws, 4);

+			__m128i bw = _mm_srai_epi16(bws, 4);

+			__m128i gw = _mm_srai_epi16(gws, 4);

+			// back to byte, set up for transpose

+			__m128i brb = _mm_packus_epi16(rw, bw);

+			__m128i gxb = _mm_packus_epi16(gw, xw);

+			// transpose to interleave channels

+			__m128i t0 = _mm_unpacklo_epi8(brb, gxb);

+			__m128i t1 = _mm_unpackhi_epi8(brb, gxb);

+			__m128i o0 = _mm_unpacklo_epi16(t0, t1);

+			__m128i o1 = _mm_unpackhi_epi16(t0, t1);

+			// store

+			_mm_storeu_si128((__m128i*)(out + 0), o0);

+			_mm_storeu_si128((__m128i*)(out + 16), o1);

+			out += 32;

+		}

+	}

+#endif

+#ifdef STBI_NEON

+	// in this version, step=3 support would be easy to add. but is there

+	// demand?

+	if (step == 4) {

+		// this is a fairly straightforward implementation and not

+		// super-optimized.

+		uint8x8_t signflip = vdup_n_u8(0x80);

+		int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));

+		int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));

+		int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));

+		int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));

+		for (; i + 7 < count; i += 8) {

+			// load

+			uint8x8_t y_bytes = vld1_u8(y + i);

+			uint8x8_t cr_bytes = vld1_u8(pcr + i);

+			uint8x8_t cb_bytes = vld1_u8(pcb + i);

+			int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));

+			int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));

+			// expand to s16

+			int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));

+			int16x8_t crw = vshll_n_s8(cr_biased, 7);

+			int16x8_t cbw = vshll_n_s8(cb_biased, 7);

+			// color transform

+			int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);

+			int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);

+			int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);

+			int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);

+			int16x8_t rws = vaddq_s16(yws, cr0);

+			int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);

+			int16x8_t bws = vaddq_s16(yws, cb1);

+			// undo scaling, round, convert to byte

+			uint8x8x4_t o;

+			o.val[0] = vqrshrun_n_s16(rws, 4);

+			o.val[1] = vqrshrun_n_s16(gws, 4);

+			o.val[2] = vqrshrun_n_s16(bws, 4);

+			o.val[3] = vdup_n_u8(255);

+			// store, interleaving r/g/b/a

+			vst4_u8(out, o);

+			out += 8 * 4;

+		}

+	}

+#endif

+	for (; i < count; ++i) {

+		int y_fixed = (y[i] << 20) + (1 << 19); // rounding

+		int r, g, b;

+		int cr = pcr[i] - 128;

+		int cb = pcb[i] - 128;

+		r = y_fixed + cr * stbi__float2fixed(1.40200f);

+		g = y_fixed + cr * -stbi__float2fixed(0.71414f) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);

+		b = y_fixed + cb * stbi__float2fixed(1.77200f);

+		r >>= 20;

+		g >>= 20;

+		b >>= 20;

+		if ((unsigned)r > 255) {

+			if (r < 0)

+				r = 0;

+			else

+				r = 255;

+		}

+		if ((unsigned)g > 255) {

+			if (g < 0)

+				g = 0;

+			else

+				g = 255;

+		}

+		if ((unsigned)b > 255) {

+			if (b < 0)

+				b = 0;

+			else

+				b = 255;

+		}

+		out[0] = (stbi_uc)r;

+		out[1] = (stbi_uc)g;

+		out[2] = (stbi_uc)b;

+		out[3] = 255;

+		out += step;

+	}

+}

+#endif

+// set up the kernels

+static void stbi__setup_jpeg(stbi__jpeg* j) {

+	j->idct_block_kernel = stbi__idct_block;

+	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;

+	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;

+#ifdef STBI_SSE2

+	if (stbi__sse2_available()) {

+		j->idct_block_kernel = stbi__idct_simd;

+		j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;

+		j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;

+	}

+#endif

+#ifdef STBI_NEON

+	j->idct_block_kernel = stbi__idct_simd;

+	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;

+	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;

+#endif

+}

+// clean up the temporary component buffers

+static void stbi__cleanup_jpeg(stbi__jpeg* j) { stbi__free_jpeg_components(j, j->s->img_n, 0); }

+typedef struct {

+	resample_row_func resample;

+	stbi_uc *line0, *line1;

+	int hs, vs;  // expansion factor in each axis

+	int w_lores; // horizontal pixels pre-expansion

+	int ystep;   // how far through vertical expansion we are

+	int ypos;	// which pre-expansion row we're on

+} stbi__resample;

+// fast 0..255 * 0..255 => 0..255 rounded multiplication

+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) {

+	unsigned int t = x * y + 128;

+	return (stbi_uc)((t + (t >> 8)) >> 8);

+}

+static stbi_uc* load_jpeg_image(stbi__jpeg* z, int* out_x, int* out_y, int* comp, int req_comp) {

+	int n, decode_n, is_rgb;

+	z->s->img_n = 0; // make stbi__cleanup_jpeg safe

+	// validate req_comp

+	if (req_comp < 0 || req_comp > 4)

+		return stbi__errpuc("bad req_comp", "Internal error");

+	// load a jpeg image from whichever source, but leave in YCbCr format

+	if (!stbi__decode_jpeg_image(z)) {

+		stbi__cleanup_jpeg(z);

+		return NULL;

+	}

+	// determine actual number of components to generate

+	n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;

+	is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));

+	if (z->s->img_n == 3 && n < 3 && !is_rgb)

+		decode_n = 1;

+	else

+		decode_n = z->s->img_n;

+	// resample and color-convert

+	{

+		int k;

+		unsigned int i, j;

+		stbi_uc* output;

+		stbi_uc* coutput[4] = {NULL, NULL, NULL, NULL};

+		stbi__resample res_comp[4];

+		for (k = 0; k < decode_n; ++k) {

+			stbi__resample* r = &res_comp[k];

+			// allocate line buffer big enough for upsampling off the edges

+			// with upsample factor of 4

+			z->img_comp[k].linebuf = (stbi_uc*)stbi__malloc(z->s->img_x + 3);

+			if (!z->img_comp[k].linebuf) {

+				stbi__cleanup_jpeg(z);

+				return stbi__errpuc("outofmem", "Out of memory");

+			}

+			r->hs = z->img_h_max / z->img_comp[k].h;

+			r->vs = z->img_v_max / z->img_comp[k].v;

+			r->ystep = r->vs >> 1;

+			r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;

+			r->ypos = 0;

+			r->line0 = r->line1 = z->img_comp[k].data;

+			if (r->hs == 1 && r->vs == 1)

+				r->resample = resample_row_1;

+			else if (r->hs == 1 && r->vs == 2)

+				r->resample = stbi__resample_row_v_2;

+			else if (r->hs == 2 && r->vs == 1)

+				r->resample = stbi__resample_row_h_2;

+			else if (r->hs == 2 && r->vs == 2)

+				r->resample = z->resample_row_hv_2_kernel;

+			else

+				r->resample = stbi__resample_row_generic;

+		}

+		// can't error after this so, this is safe

+		output = (stbi_uc*)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);

+		if (!output) {

+			stbi__cleanup_jpeg(z);

+			return stbi__errpuc("outofmem", "Out of memory");

+		}

+		// now go ahead and resample

+		for (j = 0; j < z->s->img_y; ++j) {

+			stbi_uc* out = output + n * z->s->img_x * j;

+			for (k = 0; k < decode_n; ++k) {

+				stbi__resample* r = &res_comp[k];

+				int y_bot = r->ystep >= (r->vs >> 1);

+				coutput[k] = r->resample(z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0, y_bot ? r->line0 : r->line1, r->w_lores, r->hs);

+				if (++r->ystep >= r->vs) {

+					r->ystep = 0;

+					r->line0 = r->line1;

+					if (++r->ypos < z->img_comp[k].y)

+						r->line1 += z->img_comp[k].w2;

+				}

+			}

+			if (n >= 3) {

+				stbi_uc* y = coutput[0];

+				if (z->s->img_n == 3) {

+					if (is_rgb) {

+						for (i = 0; i < z->s->img_x; ++i) {

+							out[0] = y[i];

+							out[1] = coutput[1][i];

+							out[2] = coutput[2][i];

+							out[3] = 255;

+							out += n;

+						}

+					} else {

+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);

+					}

+				} else if (z->s->img_n == 4) {

+					if (z->app14_color_transform == 0) { // CMYK

+						for (i = 0; i < z->s->img_x; ++i) {

+							stbi_uc m = coutput[3][i];

+							out[0] = stbi__blinn_8x8(coutput[0][i], m);

+							out[1] = stbi__blinn_8x8(coutput[1][i], m);

+							out[2] = stbi__blinn_8x8(coutput[2][i], m);

+							out[3] = 255;

+							out += n;

+						}

+					} else if (z->app14_color_transform == 2) { // YCCK

+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);

+						for (i = 0; i < z->s->img_x; ++i) {

+							stbi_uc m = coutput[3][i];

+							out[0] = stbi__blinn_8x8(255 - out[0], m);

+							out[1] = stbi__blinn_8x8(255 - out[1], m);

+							out[2] = stbi__blinn_8x8(255 - out[2], m);

+							out += n;

+						}

+					} else { // YCbCr + alpha?  Ignore the fourth channel for

+							 // now

+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);

+					}

+				} else

+					for (i = 0; i < z->s->img_x; ++i) {

+						out[0] = out[1] = out[2] = y[i];

+						out[3] = 255; // not used if n==3

+						out += n;

+					}

+			} else {

+				if (is_rgb) {

+					if (n == 1)

+						for (i = 0; i < z->s->img_x; ++i)

+							*out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);

+					else {

+						for (i = 0; i < z->s->img_x; ++i, out += 2) {

+							out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);

+							out[1] = 255;

+						}

+					}

+				} else if (z->s->img_n == 4 && z->app14_color_transform == 0) {

+					for (i = 0; i < z->s->img_x; ++i) {

+						stbi_uc m = coutput[3][i];

+						stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);

+						stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);

+						stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);

+						out[0] = stbi__compute_y(r, g, b);

+						out[1] = 255;

+						out += n;

+					}

+				} else if (z->s->img_n == 4 && z->app14_color_transform == 2) {

+					for (i = 0; i < z->s->img_x; ++i) {

+						out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);

+						out[1] = 255;

+						out += n;

+					}

+				} else {

+					stbi_uc* y = coutput[0];

+					if (n == 1)

+						for (i = 0; i < z->s->img_x; ++i)

+							out[i] = y[i];

+					else

+						for (i = 0; i < z->s->img_x; ++i) {

+							*out++ = y[i];

+							*out++ = 255;

+						}

+				}

+			}

+		}

+		stbi__cleanup_jpeg(z);

+		*out_x = z->s->img_x;

+		*out_y = z->s->img_y;

+		if (comp)

+			*comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output

+		return output;

+	}

+}

+static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	unsigned char* result;

+	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));

+	STBI_NOTUSED(ri);

+	j->s = s;

+	stbi__setup_jpeg(j);

+	result = load_jpeg_image(j, x, y, comp, req_comp);

+	STBI_FREE(j);

+	return result;

+}

+static int stbi__jpeg_test(stbi__context* s) {

+	int r;

+	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));

+	j->s = s;

+	stbi__setup_jpeg(j);

+	r = stbi__decode_jpeg_header(j, STBI__SCAN_type);

+	stbi__rewind(s);

+	STBI_FREE(j);

+	return r;

+}

+static int stbi__jpeg_info_raw(stbi__jpeg* j, int* x, int* y, int* comp) {

+	if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {

+		stbi__rewind(j->s);

+		return 0;

+	}

+	if (x)

+		*x = j->s->img_x;

+	if (y)

+		*y = j->s->img_y;

+	if (comp)

+		*comp = j->s->img_n >= 3 ? 3 : 1;

+	return 1;

+}

+static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp) {

+	int result;

+	stbi__jpeg* j = (stbi__jpeg*)(stbi__malloc(sizeof(stbi__jpeg)));

+	j->s = s;

+	result = stbi__jpeg_info_raw(j, x, y, comp);

+	STBI_FREE(j);

+	return result;

+}

+#endif

+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18

+//    simple implementation

+//      - all input must be provided in an upfront buffer

+//      - all output is written to a single output buffer (can malloc/realloc)

+//    performance

+//      - fast huffman

+#ifndef STBI_NO_ZLIB

+// fast-way is faster to check than jpeg huffman, but slow way is slower

+#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables

+#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)

+// zlib-style huffman encoding

+// (jpegs packs from left, zlib from right, so can't share code)

+typedef struct {

+	stbi__uint16 fast[1 << STBI__ZFAST_BITS];

+	stbi__uint16 firstcode[16];

+	int maxcode[17];

+	stbi__uint16 firstsymbol[16];

+	stbi_uc size[288];

+	stbi__uint16 value[288];

+} stbi__zhuffman;

+stbi_inline static int stbi__bitreverse16(int n) {

+	n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);

+	n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);

+	n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);

+	n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);

+	return n;

+}

+stbi_inline static int stbi__bit_reverse(int v, int bits) {

+	STBI_ASSERT(bits <= 16);

+	// to bit reverse n bits, reverse 16 and shift

+	// e.g. 11 bits, bit reverse and shift away 5

+	return stbi__bitreverse16(v) >> (16 - bits);

+}

+static int stbi__zbuild_huffman(stbi__zhuffman* z, const stbi_uc* sizelist, int num) {

+	int i, k = 0;

+	int code, next_code[16], sizes[17];

+	// DEFLATE spec for generating codes

+	memset(sizes, 0, sizeof(sizes));

+	memset(z->fast, 0, sizeof(z->fast));

+	for (i = 0; i < num; ++i)

+		++sizes[sizelist[i]];

+	sizes[0] = 0;

+	for (i = 1; i < 16; ++i)

+		if (sizes[i] > (1 << i))

+			return stbi__err("bad sizes", "Corrupt PNG");

+	code = 0;

+	for (i = 1; i < 16; ++i) {

+		next_code[i] = code;

+		z->firstcode[i] = (stbi__uint16)code;

+		z->firstsymbol[i] = (stbi__uint16)k;

+		code = (code + sizes[i]);

+		if (sizes[i])

+			if (code - 1 >= (1 << i))

+				return stbi__err("bad codelengths", "Corrupt PNG");

+		z->maxcode[i] = code << (16 - i); // preshift for inner loop

+		code <<= 1;

+		k += sizes[i];

+	}

+	z->maxcode[16] = 0x10000; // sentinel

+	for (i = 0; i < num; ++i) {

+		int s = sizelist[i];

+		if (s) {

+			int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];

+			stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);

+			z->size[c] = (stbi_uc)s;

+			z->value[c] = (stbi__uint16)i;

+			if (s <= STBI__ZFAST_BITS) {

+				int j = stbi__bit_reverse(next_code[s], s);

+				while (j < (1 << STBI__ZFAST_BITS)) {

+					z->fast[j] = fastv;

+					j += (1 << s);

+				}

+			}

+			++next_code[s];

+		}

+	}

+	return 1;

+}

+// zlib-from-memory implementation for PNG reading

+//    because PNG allows splitting the zlib stream arbitrarily,

+//    and it's annoying structurally to have PNG call ZLIB call PNG,

+//    we require PNG read all the IDATs and combine them into a single

+//    memory buffer

+typedef struct {

+	stbi_uc *zbuffer, *zbuffer_end;

+	int num_bits;

+	stbi__uint32 code_buffer;

+	char* zout;

+	char* zout_start;

+	char* zout_end;

+	int z_expandable;

+	stbi__zhuffman z_length, z_distance;

+} stbi__zbuf;

+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf* z) {

+	if (z->zbuffer >= z->zbuffer_end)

+		return 0;

+	return *z->zbuffer++;

+}

+static void stbi__fill_bits(stbi__zbuf* z) {

+	do {

+		STBI_ASSERT(z->code_buffer < (1U << z->num_bits));

+		z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;

+		z->num_bits += 8;

+	} while (z->num_bits <= 24);

+}

+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf* z, int n) {

+	unsigned int k;

+	if (z->num_bits < n)

+		stbi__fill_bits(z);

+	k = z->code_buffer & ((1 << n) - 1);

+	z->code_buffer >>= n;

+	z->num_bits -= n;

+	return k;

+}

+static int stbi__zhuffman_decode_slowpath(stbi__zbuf* a, stbi__zhuffman* z) {

+	int b, s, k;

+	// not resolved by fast table, so compute it the slow way

+	// use jpeg approach, which requires MSbits at top

+	k = stbi__bit_reverse(a->code_buffer, 16);

+	for (s = STBI__ZFAST_BITS + 1;; ++s)

+		if (k < z->maxcode[s])

+			break;

+	if (s == 16)

+		return -1; // invalid code!

+	// code size is s, so:

+	b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];

+	STBI_ASSERT(z->size[b] == s);

+	a->code_buffer >>= s;

+	a->num_bits -= s;

+	return z->value[b];

+}

+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf* a, stbi__zhuffman* z) {

+	int b, s;

+	if (a->num_bits < 16)

+		stbi__fill_bits(a);

+	b = z->fast[a->code_buffer & STBI__ZFAST_MASK];

+	if (b) {

+		s = b >> 9;

+		a->code_buffer >>= s;

+		a->num_bits -= s;

+		return b & 511;

+	}

+	return stbi__zhuffman_decode_slowpath(a, z);

+}

+static int stbi__zexpand(stbi__zbuf* z, char* zout,

+						 int n) // need to make room for n bytes

+{

+	char* q;

+	int cur, limit, old_limit;

+	z->zout = zout;

+	if (!z->z_expandable)

+		return stbi__err("output buffer limit", "Corrupt PNG");

+	cur = (int)(z->zout - z->zout_start);

+	limit = old_limit = (int)(z->zout_end - z->zout_start);

+	while (cur + n > limit)

+		limit *= 2;

+	q = (char*)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);

+	STBI_NOTUSED(old_limit);

+	if (q == NULL)

+		return stbi__err("outofmem", "Out of memory");

+	z->zout_start = q;

+	z->zout = q + cur;

+	z->zout_end = q + limit;

+	return 1;

+}

+static const int stbi__zlength_base[31] = {3,  4,  5,  6,  7,  8,  9,  10,  11,  13,  15,  17,  19,  23, 27, 31,

+										   35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0,  0};

+static const int stbi__zlength_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0};

+static const int stbi__zdist_base[32] = {1,   2,   3,   4,   5,	7,	9,	13,   17,   25,   33,   49,	65,	97,	129, 193,

+										 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};

+static const int stbi__zdist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};

+static int stbi__parse_huffman_block(stbi__zbuf* a) {

+	char* zout = a->zout;

+	for (;;) {

+		int z = stbi__zhuffman_decode(a, &a->z_length);

+		if (z < 256) {

+			if (z < 0)

+				return stbi__err("bad huffman code",

+								 "Corrupt PNG"); // error in huffman codes

+			if (zout >= a->zout_end) {

+				if (!stbi__zexpand(a, zout, 1))

+					return 0;

+				zout = a->zout;

+			}

+			*zout++ = (char)z;

+		} else {

+			stbi_uc* p;

+			int len, dist;

+			if (z == 256) {

+				a->zout = zout;

+				return 1;

+			}

+			z -= 257;

+			len = stbi__zlength_base[z];

+			if (stbi__zlength_extra[z])

+				len += stbi__zreceive(a, stbi__zlength_extra[z]);

+			z = stbi__zhuffman_decode(a, &a->z_distance);

+			if (z < 0)

+				return stbi__err("bad huffman code", "Corrupt PNG");

+			dist = stbi__zdist_base[z];

+			if (stbi__zdist_extra[z])

+				dist += stbi__zreceive(a, stbi__zdist_extra[z]);

+			if (zout - a->zout_start < dist)

+				return stbi__err("bad dist", "Corrupt PNG");

+			if (zout + len > a->zout_end) {

+				if (!stbi__zexpand(a, zout, len))

+					return 0;

+				zout = a->zout;

+			}

+			p = (stbi_uc*)(zout - dist);

+			if (dist == 1) { // run of one byte; common in images.

+				stbi_uc v = *p;

+				if (len) {

+					do

+						*zout++ = v;

+					while (--len);

+				}

+			} else {

+				if (len) {

+					do

+						*zout++ = *p++;

+					while (--len);

+				}

+			}

+		}

+	}

+}

+static int stbi__compute_huffman_codes(stbi__zbuf* a) {

+	static const stbi_uc length_dezigzag[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};

+	stbi__zhuffman z_codelength;

+	stbi_uc lencodes[286 + 32 + 137]; // padding for maximum single op

+	stbi_uc codelength_sizes[19];

+	int i, n;

+	int hlit = stbi__zreceive(a, 5) + 257;

+	int hdist = stbi__zreceive(a, 5) + 1;

+	int hclen = stbi__zreceive(a, 4) + 4;

+	int ntot = hlit + hdist;

+	memset(codelength_sizes, 0, sizeof(codelength_sizes));

+	for (i = 0; i < hclen; ++i) {

+		int s = stbi__zreceive(a, 3);

+		codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;

+	}

+	if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19))

+		return 0;

+	n = 0;

+	while (n < ntot) {

+		int c = stbi__zhuffman_decode(a, &z_codelength);

+		if (c < 0 || c >= 19)

+			return stbi__err("bad codelengths", "Corrupt PNG");

+		if (c < 16)

+			lencodes[n++] = (stbi_uc)c;

+		else {

+			stbi_uc fill = 0;

+			if (c == 16) {

+				c = stbi__zreceive(a, 2) + 3;

+				if (n == 0)

+					return stbi__err("bad codelengths", "Corrupt PNG");

+				fill = lencodes[n - 1];

+			} else if (c == 17)

+				c = stbi__zreceive(a, 3) + 3;

+			else {

+				STBI_ASSERT(c == 18);

+				c = stbi__zreceive(a, 7) + 11;

+			}

+			if (ntot - n < c)

+				return stbi__err("bad codelengths", "Corrupt PNG");

+			memset(lencodes + n, fill, c);

+			n += c;

+		}

+	}

+	if (n != ntot)

+		return stbi__err("bad codelengths", "Corrupt PNG");

+	if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit))

+		return 0;

+	if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist))

+		return 0;

+	return 1;

+}

+static int stbi__parse_uncompressed_block(stbi__zbuf* a) {

+	stbi_uc header[4];

+	int len, nlen, k;

+	if (a->num_bits & 7)

+		stbi__zreceive(a, a->num_bits & 7); // discard

+	// drain the bit-packed data into header

+	k = 0;

+	while (a->num_bits > 0) {

+		header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check

+		a->code_buffer >>= 8;

+		a->num_bits -= 8;

+	}

+	STBI_ASSERT(a->num_bits == 0);

+	// now fill header the normal way

+	while (k < 4)

+		header[k++] = stbi__zget8(a);

+	len = header[1] * 256 + header[0];

+	nlen = header[3] * 256 + header[2];

+	if (nlen != (len ^ 0xffff))

+		return stbi__err("zlib corrupt", "Corrupt PNG");

+	if (a->zbuffer + len > a->zbuffer_end)

+		return stbi__err("read past buffer", "Corrupt PNG");

+	if (a->zout + len > a->zout_end)

+		if (!stbi__zexpand(a, a->zout, len))

+			return 0;

+	memcpy(a->zout, a->zbuffer, len);

+	a->zbuffer += len;

+	a->zout += len;

+	return 1;

+}

+static int stbi__parse_zlib_header(stbi__zbuf* a) {

+	int cmf = stbi__zget8(a);

+	int cm = cmf & 15;

+	/* int cinfo = cmf >> 4; */

+	int flg = stbi__zget8(a);

+	if ((cmf * 256 + flg) % 31 != 0)

+		return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec

+	if (flg & 32)

+		return stbi__err("no preset dict",

+						 "Corrupt PNG"); // preset dictionary not allowed in png

+	if (cm != 8)

+		return stbi__err("bad compression",

+						 "Corrupt PNG"); // DEFLATE required for png

+	// window = 1 << (8 + cinfo)... but who cares, we fully buffer output

+	return 1;

+}

+static const stbi_uc stbi__zdefault_length[288] = {

+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

+	8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,

+	9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,

+	9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,

+	9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8};

+static const stbi_uc stbi__zdefault_distance[32] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};

+/*

+Init algorithm:

+{

+   int i;   // use <= to match clearly with spec

+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;

+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;

+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;

+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;

+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;

+}

+*/

+static int stbi__parse_zlib(stbi__zbuf* a, int parse_header) {

+	int final, type;

+	if (parse_header)

+		if (!stbi__parse_zlib_header(a))

+			return 0;

+	a->num_bits = 0;

+	a->code_buffer = 0;

+	do {

+		final = stbi__zreceive(a, 1);

+		type = stbi__zreceive(a, 2);

+		if (type == 0) {

+			if (!stbi__parse_uncompressed_block(a))

+				return 0;

+		} else if (type == 3) {

+			return 0;

+		} else {

+			if (type == 1) {

+				// use fixed code lengths

+				if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288))

+					return 0;

+				if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32))

+					return 0;

+			} else {

+				if (!stbi__compute_huffman_codes(a))

+					return 0;

+			}

+			if (!stbi__parse_huffman_block(a))

+				return 0;

+		}

+	} while (!final);

+	return 1;

+}

+static int stbi__do_zlib(stbi__zbuf* a, char* obuf, int olen, int exp, int parse_header) {

+	a->zout_start = obuf;

+	a->zout = obuf;

+	a->zout_end = obuf + olen;

+	a->z_expandable = exp;

+	return stbi__parse_zlib(a, parse_header);

+}

+STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen) {

+	stbi__zbuf a;

+	char* p = (char*)stbi__malloc(initial_size);

+	if (p == NULL)

+		return NULL;

+	a.zbuffer = (stbi_uc*)buffer;

+	a.zbuffer_end = (stbi_uc*)buffer + len;

+	if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {

+		if (outlen)

+			*outlen = (int)(a.zout - a.zout_start);

+		return a.zout_start;

+	} else {

+		STBI_FREE(a.zout_start);

+		return NULL;

+	}

+}

+STBIDEF char* stbi_zlib_decode_malloc(char const* buffer, int len, int* outlen) { return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); }

+STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header) {

+	stbi__zbuf a;

+	char* p = (char*)stbi__malloc(initial_size);

+	if (p == NULL)

+		return NULL;

+	a.zbuffer = (stbi_uc*)buffer;

+	a.zbuffer_end = (stbi_uc*)buffer + len;

+	if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {

+		if (outlen)

+			*outlen = (int)(a.zout - a.zout_start);

+		return a.zout_start;

+	} else {

+		STBI_FREE(a.zout_start);

+		return NULL;

+	}

+}

+STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, char const* ibuffer, int ilen) {

+	stbi__zbuf a;

+	a.zbuffer = (stbi_uc*)ibuffer;

+	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;

+	if (stbi__do_zlib(&a, obuffer, olen, 0, 1))

+		return (int)(a.zout - a.zout_start);

+	else

+		return -1;

+}

+STBIDEF char* stbi_zlib_decode_noheader_malloc(char const* buffer, int len, int* outlen) {

+	stbi__zbuf a;

+	char* p = (char*)stbi__malloc(16384);

+	if (p == NULL)

+		return NULL;

+	a.zbuffer = (stbi_uc*)buffer;

+	a.zbuffer_end = (stbi_uc*)buffer + len;

+	if (stbi__do_zlib(&a, p, 16384, 1, 0)) {

+		if (outlen)

+			*outlen = (int)(a.zout - a.zout_start);

+		return a.zout_start;

+	} else {

+		STBI_FREE(a.zout_start);

+		return NULL;

+	}

+}

+STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen) {

+	stbi__zbuf a;

+	a.zbuffer = (stbi_uc*)ibuffer;

+	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;

+	if (stbi__do_zlib(&a, obuffer, olen, 0, 0))

+		return (int)(a.zout - a.zout_start);

+	else

+		return -1;

+}

+#endif

+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18

+//    simple implementation

+//      - only 8-bit samples

+//      - no CRC checking

+//      - allocates lots of intermediate memory

+//        - avoids problem of streaming data between subsystems

+//        - avoids explicit window management

+//    performance

+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding

+#ifndef STBI_NO_PNG

+typedef struct {

+	stbi__uint32 length;

+	stbi__uint32 type;

+} stbi__pngchunk;

+static stbi__pngchunk stbi__get_chunk_header(stbi__context* s) {

+	stbi__pngchunk c;

+	c.length = stbi__get32be(s);

+	c.type = stbi__get32be(s);

+	return c;

+}

+static int stbi__check_png_header(stbi__context* s) {

+	static const stbi_uc png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};

+	int i;

+	for (i = 0; i < 8; ++i)

+		if (stbi__get8(s) != png_sig[i])

+			return stbi__err("bad png sig", "Not a PNG");

+	return 1;

+}

+typedef struct {

+	stbi__context* s;

+	stbi_uc *idata, *expanded, *out;

+	int depth;

+} stbi__png;

+enum {

+	STBI__F_none = 0,

+	STBI__F_sub = 1,

+	STBI__F_up = 2,

+	STBI__F_avg = 3,

+	STBI__F_paeth = 4,

+	// synthetic filters used for first scanline to avoid needing a dummy row of

+	// 0s

+	STBI__F_avg_first,

+	STBI__F_paeth_first

+};

+static stbi_uc first_row_filter[5] = {STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first, STBI__F_paeth_first};

+static int stbi__paeth(int a, int b, int c) {

+	int p = a + b - c;

+	int pa = abs(p - a);

+	int pb = abs(p - b);

+	int pc = abs(p - c);

+	if (pa <= pb && pa <= pc)

+		return a;

+	if (pb <= pc)

+		return b;

+	return c;

+}

+static const stbi_uc stbi__depth_scale_table[9] = {0, 0xff, 0x55, 0, 0x11, 0, 0, 0, 0x01};

+// create the png data from post-deflated data

+static int stbi__create_png_image_raw(stbi__png* a, stbi_uc* raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) {

+	int bytes = (depth == 16 ? 2 : 1);

+	stbi__context* s = a->s;

+	stbi__uint32 i, j, stride = x * out_n * bytes;

+	stbi__uint32 img_len, img_width_bytes;

+	int k;

+	int img_n = s->img_n; // copy it into a local for later

+	int output_bytes = out_n * bytes;

+	int filter_bytes = img_n * bytes;

+	int width = x;

+	STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);

+	a->out = (stbi_uc*)stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into

+	if (!a->out)

+		return stbi__err("outofmem", "Out of memory");

+	if (!stbi__mad3sizes_valid(img_n, x, depth, 7))

+		return stbi__err("too large", "Corrupt PNG");

+	img_width_bytes = (((img_n * x * depth) + 7) >> 3);

+	img_len = (img_width_bytes + 1) * y;

+	// we used to check for exact match between raw_len and img_len on

+	// non-interlaced PNGs, but issue #276 reported a PNG in the wild that had

+	// extra data at the end (all zeros), so just check for raw_len < img_len

+	// always.

+	if (raw_len < img_len)

+		return stbi__err("not enough pixels", "Corrupt PNG");

+	for (j = 0; j < y; ++j) {

+		stbi_uc* cur = a->out + stride * j;

+		stbi_uc* prior;

+		int filter = *raw++;

+		if (filter > 4)

+			return stbi__err("invalid filter", "Corrupt PNG");

+		if (depth < 8) {

+			STBI_ASSERT(img_width_bytes <= x);

+			cur += x * out_n - img_width_bytes; // store output to the rightmost img_len

+												// bytes, so we can decode in place

+			filter_bytes = 1;

+			width = img_width_bytes;

+		}

+		prior = cur - stride; // bugfix: need to compute this after 'cur +='

+							  // computation above

+		// if first row, use special filter that doesn't sample previous row

+		if (j == 0)

+			filter = first_row_filter[filter];

+		// handle first byte explicitly

+		for (k = 0; k < filter_bytes; ++k) {

+			switch (filter) {

+			case STBI__F_none:

+				cur[k] = raw[k];

+				break;

+			case STBI__F_sub:

+				cur[k] = raw[k];

+				break;

+			case STBI__F_up:

+				cur[k] = STBI__BYTECAST(raw[k] + prior[k]);

+				break;

+			case STBI__F_avg:

+				cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1));

+				break;

+			case STBI__F_paeth:

+				cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0));

+				break;

+			case STBI__F_avg_first:

+				cur[k] = raw[k];

+				break;

+			case STBI__F_paeth_first:

+				cur[k] = raw[k];

+				break;

+			}

+		}

+		if (depth == 8) {

+			if (img_n != out_n)

+				cur[img_n] = 255; // first pixel

+			raw += img_n;

+			cur += out_n;

+			prior += out_n;

+		} else if (depth == 16) {

+			if (img_n != out_n) {

+				cur[filter_bytes] = 255;	 // first pixel top byte

+				cur[filter_bytes + 1] = 255; // first pixel bottom byte

+			}

+			raw += filter_bytes;

+			cur += output_bytes;

+			prior += output_bytes;

+		} else {

+			raw += 1;

+			cur += 1;

+			prior += 1;

+		}

+		// this is a little gross, so that we don't switch per-pixel or

+		// per-component

+		if (depth < 8 || img_n == out_n) {

+			int nk = (width - 1) * filter_bytes;

+#define STBI__CASE(f)                                                                                                                                          \

+	case f:                                                                                                                                                    \

+		for (k = 0; k < nk; ++k)

+			switch (filter) {

+			// "none" filter turns into a memcpy here; make that explicit.

+			case STBI__F_none:

+				memcpy(cur, raw, nk);

+				break;

+				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); }

+				break;

+				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }

+				break;

+				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); }

+				break;

+				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); }

+				break;

+				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); }

+				break;

+				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0)); }

+				break;

+			}

+#undef STBI__CASE

+			raw += nk;

+		} else {

+			STBI_ASSERT(img_n + 1 == out_n);

+#define STBI__CASE(f)                                                                                                                                          \

+	case f:                                                                                                                                                    \

+		for (i = x - 1; i >= 1; --i, cur[filter_bytes] = 255, raw += filter_bytes, cur += output_bytes, prior += output_bytes)                                 \

+			for (k = 0; k < filter_bytes; ++k)

+			switch (filter) {

+				STBI__CASE(STBI__F_none) { cur[k] = raw[k]; }

+				break;

+				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); }

+				break;

+				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }

+				break;

+				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); }

+				break;

+				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); }

+				break;

+				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); }

+				break;

+				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0)); }

+				break;

+			}

+#undef STBI__CASE

+			// the loop above sets the high byte of the pixels' alpha, but for

+			// 16 bit png files we also need the low byte set. we'll do that

+			// here.

+			if (depth == 16) {

+				cur = a->out + stride * j; // start at the beginning of the row again

+				for (i = 0; i < x; ++i, cur += output_bytes) {

+					cur[filter_bytes + 1] = 255;

+				}

+			}

+		}

+	}

+	// we make a separate pass to expand bits to pixels; for performance,

+	// this could run two scanlines behind the above code, so it won't

+	// intefere with filtering but will still be in the cache.

+	if (depth < 8) {

+		for (j = 0; j < y; ++j) {

+			stbi_uc* cur = a->out + stride * j;

+			stbi_uc* in = a->out + stride * j + x * out_n - img_width_bytes;

+			// unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the

+			// common 8-bit path optimal at minimal cost for 1/2/4-bit png

+			// guarante byte alignment, if width is not multiple of 8/4/2 we'll

+			// decode dummy trailing data that will be skipped in the later loop

+			stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range

+			// note that the final byte might overshoot and write more data than

+			// desired. we can allocate enough data that this never writes out

+			// of memory, but it could also overwrite the next scanline. can it

+			// overwrite non-empty data on the next scanline? yes, consider

+			// 1-pixel-wide scanlines with 1-bit-per-pixel. so we need to

+			// explicitly clamp the final ones

+			if (depth == 4) {

+				for (k = x * img_n; k >= 2; k -= 2, ++in) {

+					*cur++ = scale * ((*in >> 4));

+					*cur++ = scale * ((*in) & 0x0f);

+				}

+				if (k > 0)

+					*cur++ = scale * ((*in >> 4));

+			} else if (depth == 2) {

+				for (k = x * img_n; k >= 4; k -= 4, ++in) {

+					*cur++ = scale * ((*in >> 6));

+					*cur++ = scale * ((*in >> 4) & 0x03);

+					*cur++ = scale * ((*in >> 2) & 0x03);

+					*cur++ = scale * ((*in) & 0x03);

+				}

+				if (k > 0)

+					*cur++ = scale * ((*in >> 6));

+				if (k > 1)

+					*cur++ = scale * ((*in >> 4) & 0x03);

+				if (k > 2)

+					*cur++ = scale * ((*in >> 2) & 0x03);

+			} else if (depth == 1) {

+				for (k = x * img_n; k >= 8; k -= 8, ++in) {

+					*cur++ = scale * ((*in >> 7));

+					*cur++ = scale * ((*in >> 6) & 0x01);

+					*cur++ = scale * ((*in >> 5) & 0x01);

+					*cur++ = scale * ((*in >> 4) & 0x01);

+					*cur++ = scale * ((*in >> 3) & 0x01);

+					*cur++ = scale * ((*in >> 2) & 0x01);

+					*cur++ = scale * ((*in >> 1) & 0x01);

+					*cur++ = scale * ((*in) & 0x01);

+				}

+				if (k > 0)

+					*cur++ = scale * ((*in >> 7));

+				if (k > 1)

+					*cur++ = scale * ((*in >> 6) & 0x01);

+				if (k > 2)

+					*cur++ = scale * ((*in >> 5) & 0x01);

+				if (k > 3)

+					*cur++ = scale * ((*in >> 4) & 0x01);

+				if (k > 4)

+					*cur++ = scale * ((*in >> 3) & 0x01);

+				if (k > 5)

+					*cur++ = scale * ((*in >> 2) & 0x01);

+				if (k > 6)

+					*cur++ = scale * ((*in >> 1) & 0x01);

+			}

+			if (img_n != out_n) {

+				int q;

+				// insert alpha = 255

+				cur = a->out + stride * j;

+				if (img_n == 1) {

+					for (q = x - 1; q >= 0; --q) {

+						cur[q * 2 + 1] = 255;

+						cur[q * 2 + 0] = cur[q];

+					}

+				} else {

+					STBI_ASSERT(img_n == 3);

+					for (q = x - 1; q >= 0; --q) {

+						cur[q * 4 + 3] = 255;

+						cur[q * 4 + 2] = cur[q * 3 + 2];

+						cur[q * 4 + 1] = cur[q * 3 + 1];

+						cur[q * 4 + 0] = cur[q * 3 + 0];

+					}

+				}

+			}

+		}

+	} else if (depth == 16) {

+		// force the image data from big-endian to platform-native.

+		// this is done in a separate pass due to the decoding relying

+		// on the data being untouched, but could probably be done

+		// per-line during decode if care is taken.

+		stbi_uc* cur = a->out;

+		stbi__uint16* cur16 = (stbi__uint16*)cur;

+		for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {

+			*cur16 = (cur[0] << 8) | cur[1];

+		}

+	}

+	return 1;

+}

+static int stbi__create_png_image(stbi__png* a, stbi_uc* image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) {

+	int bytes = (depth == 16 ? 2 : 1);

+	int out_bytes = out_n * bytes;

+	stbi_uc* final;

+	int p;

+	if (!interlaced)

+		return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);

+	// de-interlacing

+	final = (stbi_uc*)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);

+	for (p = 0; p < 7; ++p) {

+		int xorig[] = {0, 4, 0, 2, 0, 1, 0};

+		int yorig[] = {0, 0, 4, 0, 2, 0, 1};

+		int xspc[] = {8, 8, 4, 4, 2, 2, 1};

+		int yspc[] = {8, 8, 8, 4, 4, 2, 2};

+		int i, j, x, y;

+		// pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1

+		x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];

+		y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];

+		if (x && y) {

+			stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;

+			if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {

+				STBI_FREE(final);

+				return 0;

+			}

+			for (j = 0; j < y; ++j) {

+				for (i = 0; i < x; ++i) {

+					int out_y = j * yspc[p] + yorig[p];

+					int out_x = i * xspc[p] + xorig[p];

+					memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes, a->out + (j * x + i) * out_bytes, out_bytes);

+				}

+			}

+			STBI_FREE(a->out);

+			image_data += img_len;

+			image_data_len -= img_len;

+		}

+	}

+	a->out = final;

+	return 1;

+}

+static int stbi__compute_transparency(stbi__png* z, stbi_uc tc[3], int out_n) {

+	stbi__context* s = z->s;

+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;

+	stbi_uc* p = z->out;

+	// compute color-based transparency, assuming we've

+	// already got 255 as the alpha value in the output

+	STBI_ASSERT(out_n == 2 || out_n == 4);

+	if (out_n == 2) {

+		for (i = 0; i < pixel_count; ++i) {

+			p[1] = (p[0] == tc[0] ? 0 : 255);

+			p += 2;

+		}

+	} else {

+		for (i = 0; i < pixel_count; ++i) {

+			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])

+				p[3] = 0;

+			p += 4;

+		}

+	}

+	return 1;

+}

+static int stbi__compute_transparency16(stbi__png* z, stbi__uint16 tc[3], int out_n) {

+	stbi__context* s = z->s;

+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;

+	stbi__uint16* p = (stbi__uint16*)z->out;

+	// compute color-based transparency, assuming we've

+	// already got 65535 as the alpha value in the output

+	STBI_ASSERT(out_n == 2 || out_n == 4);

+	if (out_n == 2) {

+		for (i = 0; i < pixel_count; ++i) {

+			p[1] = (p[0] == tc[0] ? 0 : 65535);

+			p += 2;

+		}

+	} else {

+		for (i = 0; i < pixel_count; ++i) {

+			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])

+				p[3] = 0;

+			p += 4;

+		}

+	}

+	return 1;

+}

+static int stbi__expand_png_palette(stbi__png* a, stbi_uc* palette, int len, int pal_img_n) {

+	stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;

+	stbi_uc *p, *temp_out, *orig = a->out;

+	p = (stbi_uc*)stbi__malloc_mad2(pixel_count, pal_img_n, 0);

+	if (p == NULL)

+		return stbi__err("outofmem", "Out of memory");

+	// between here and free(out) below, exitting would leak

+	temp_out = p;

+	if (pal_img_n == 3) {

+		for (i = 0; i < pixel_count; ++i) {

+			int n = orig[i] * 4;

+			p[0] = palette[n];

+			p[1] = palette[n + 1];

+			p[2] = palette[n + 2];

+			p += 3;

+		}

+	} else {

+		for (i = 0; i < pixel_count; ++i) {

+			int n = orig[i] * 4;

+			p[0] = palette[n];

+			p[1] = palette[n + 1];

+			p[2] = palette[n + 2];

+			p[3] = palette[n + 3];

+			p += 4;

+		}

+	}

+	STBI_FREE(a->out);

+	a->out = temp_out;

+	STBI_NOTUSED(len);

+	return 1;

+}

+static int stbi__unpremultiply_on_load = 0;

+static int stbi__de_iphone_flag = 0;

+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) { stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; }

+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) { stbi__de_iphone_flag = flag_true_if_should_convert; }

+static void stbi__de_iphone(stbi__png* z) {

+	stbi__context* s = z->s;

+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;

+	stbi_uc* p = z->out;

+	if (s->img_out_n == 3) { // convert bgr to rgb

+		for (i = 0; i < pixel_count; ++i) {

+			stbi_uc t = p[0];

+			p[0] = p[2];

+			p[2] = t;

+			p += 3;

+		}

+	} else {

+		STBI_ASSERT(s->img_out_n == 4);

+		if (stbi__unpremultiply_on_load) {

+			// convert bgr to rgb and unpremultiply

+			for (i = 0; i < pixel_count; ++i) {

+				stbi_uc a = p[3];

+				stbi_uc t = p[0];

+				if (a) {

+					stbi_uc half = a / 2;

+					p[0] = (p[2] * 255 + half) / a;

+					p[1] = (p[1] * 255 + half) / a;

+					p[2] = (t * 255 + half) / a;

+				} else {

+					p[0] = p[2];

+					p[2] = t;

+				}

+				p += 4;

+			}

+		} else {

+			// convert bgr to rgb

+			for (i = 0; i < pixel_count; ++i) {

+				stbi_uc t = p[0];

+				p[0] = p[2];

+				p[2] = t;

+				p += 4;

+			}

+		}

+	}

+}

+#define STBI__PNG_TYPE(a, b, c, d) (((unsigned)(a) << 24) + ((unsigned)(b) << 16) + ((unsigned)(c) << 8) + (unsigned)(d))

+static int stbi__parse_png_file(stbi__png* z, int scan, int req_comp) {

+	stbi_uc palette[1024], pal_img_n = 0;

+	stbi_uc has_trans = 0, tc[3] = {0};

+	stbi__uint16 tc16[3];

+	stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;

+	int first = 1, k, interlace = 0, color = 0, is_iphone = 0;

+	stbi__context* s = z->s;

+	z->expanded = NULL;

+	z->idata = NULL;

+	z->out = NULL;

+	if (!stbi__check_png_header(s))

+		return 0;

+	if (scan == STBI__SCAN_type)

+		return 1;

+	for (;;) {

+		stbi__pngchunk c = stbi__get_chunk_header(s);

+		switch (c.type) {

+		case STBI__PNG_TYPE('C', 'g', 'B', 'I'):

+			is_iphone = 1;

+			stbi__skip(s, c.length);

+			break;

+		case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {

+			int comp, filter;

+			if (!first)

+				return stbi__err("multiple IHDR", "Corrupt PNG");

+			first = 0;

+			if (c.length != 13)

+				return stbi__err("bad IHDR len", "Corrupt PNG");

+			s->img_x = stbi__get32be(s);

+			if (s->img_x > (1 << 24))

+				return stbi__err("too large", "Very large image (corrupt?)");

+			s->img_y = stbi__get32be(s);

+			if (s->img_y > (1 << 24))

+				return stbi__err("too large", "Very large image (corrupt?)");

+			z->depth = stbi__get8(s);

+			if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)

+				return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");

+			color = stbi__get8(s);

+			if (color > 6)

+				return stbi__err("bad ctype", "Corrupt PNG");

+			if (color == 3 && z->depth == 16)

+				return stbi__err("bad ctype", "Corrupt PNG");

+			if (color == 3)

+				pal_img_n = 3;

+			else if (color & 1)

+				return stbi__err("bad ctype", "Corrupt PNG");

+			comp = stbi__get8(s);

+			if (comp)

+				return stbi__err("bad comp method", "Corrupt PNG");

+			filter = stbi__get8(s);

+			if (filter)

+				return stbi__err("bad filter method", "Corrupt PNG");

+			interlace = stbi__get8(s);

+			if (interlace > 1)

+				return stbi__err("bad interlace method", "Corrupt PNG");

+			if (!s->img_x || !s->img_y)

+				return stbi__err("0-pixel image", "Corrupt PNG");

+			if (!pal_img_n) {

+				s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);

+				if ((1 << 30) / s->img_x / s->img_n < s->img_y)

+					return stbi__err("too large", "Image too large to decode");

+				if (scan == STBI__SCAN_header)

+					return 1;

+			} else {

+				// if paletted, then pal_n is our final components, and

+				// img_n is # components to decompress/filter.

+				s->img_n = 1;

+				if ((1 << 30) / s->img_x / 4 < s->img_y)

+					return stbi__err("too large", "Corrupt PNG");

+				// if SCAN_header, have to scan to see if we have a tRNS

+			}

+			break;

+		}

+		case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {

+			if (first)

+				return stbi__err("first not IHDR", "Corrupt PNG");

+			if (c.length > 256 * 3)

+				return stbi__err("invalid PLTE", "Corrupt PNG");

+			pal_len = c.length / 3;

+			if (pal_len * 3 != c.length)

+				return stbi__err("invalid PLTE", "Corrupt PNG");

+			for (i = 0; i < pal_len; ++i) {

+				palette[i * 4 + 0] = stbi__get8(s);

+				palette[i * 4 + 1] = stbi__get8(s);

+				palette[i * 4 + 2] = stbi__get8(s);

+				palette[i * 4 + 3] = 255;

+			}

+			break;

+		}

+		case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {

+			if (first)

+				return stbi__err("first not IHDR", "Corrupt PNG");

+			if (z->idata)

+				return stbi__err("tRNS after IDAT", "Corrupt PNG");

+			if (pal_img_n) {

+				if (scan == STBI__SCAN_header) {

+					s->img_n = 4;

+					return 1;

+				}

+				if (pal_len == 0)

+					return stbi__err("tRNS before PLTE", "Corrupt PNG");

+				if (c.length > pal_len)

+					return stbi__err("bad tRNS len", "Corrupt PNG");

+				pal_img_n = 4;

+				for (i = 0; i < c.length; ++i)

+					palette[i * 4 + 3] = stbi__get8(s);

+			} else {

+				if (!(s->img_n & 1))

+					return stbi__err("tRNS with alpha", "Corrupt PNG");

+				if (c.length != (stbi__uint32)s->img_n * 2)

+					return stbi__err("bad tRNS len", "Corrupt PNG");

+				has_trans = 1;

+				if (z->depth == 16) {

+					for (k = 0; k < s->img_n; ++k)

+						tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is

+				} else {

+					for (k = 0; k < s->img_n; ++k)

+						tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit

+																									   // images will be

+																									   // larger

+				}

+			}

+			break;

+		}

+		case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {

+			if (first)

+				return stbi__err("first not IHDR", "Corrupt PNG");

+			if (pal_img_n && !pal_len)

+				return stbi__err("no PLTE", "Corrupt PNG");

+			if (scan == STBI__SCAN_header) {

+				s->img_n = pal_img_n;

+				return 1;

+			}

+			if ((int)(ioff + c.length) < (int)ioff)

+				return 0;

+			if (ioff + c.length > idata_limit) {

+				stbi__uint32 idata_limit_old = idata_limit;

+				stbi_uc* p;

+				if (idata_limit == 0)

+					idata_limit = c.length > 4096 ? c.length : 4096;

+				while (ioff + c.length > idata_limit)

+					idata_limit *= 2;

+				STBI_NOTUSED(idata_limit_old);

+				p = (stbi_uc*)STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit);

+				if (p == NULL)

+					return stbi__err("outofmem", "Out of memory");

+				z->idata = p;

+			}

+			if (!stbi__getn(s, z->idata + ioff, c.length))

+				return stbi__err("outofdata", "Corrupt PNG");

+			ioff += c.length;

+			break;

+		}

+		case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {

+			stbi__uint32 raw_len, bpl;

+			if (first)

+				return stbi__err("first not IHDR", "Corrupt PNG");

+			if (scan != STBI__SCAN_load)

+				return 1;

+			if (z->idata == NULL)

+				return stbi__err("no IDAT", "Corrupt PNG");

+			// initial guess for decoded data size to avoid unnecessary reallocs

+			bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component

+			raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;

+			z->expanded = (stbi_uc*)stbi_zlib_decode_malloc_guesssize_headerflag((char*)z->idata, ioff, raw_len, (int*)&raw_len, !is_iphone);

+			if (z->expanded == NULL)

+				return 0; // zlib should set error

+			STBI_FREE(z->idata);

+			z->idata = NULL;

+			if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)

+				s->img_out_n = s->img_n + 1;

+			else

+				s->img_out_n = s->img_n;

+			if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace))

+				return 0;

+			if (has_trans) {

+				if (z->depth == 16) {

+					if (!stbi__compute_transparency16(z, tc16, s->img_out_n))

+						return 0;

+				} else {

+					if (!stbi__compute_transparency(z, tc, s->img_out_n))

+						return 0;

+				}

+			}

+			if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)

+				stbi__de_iphone(z);

+			if (pal_img_n) {

+				// pal_img_n == 3 or 4

+				s->img_n = pal_img_n; // record the actual colors we had

+				s->img_out_n = pal_img_n;

+				if (req_comp >= 3)

+					s->img_out_n = req_comp;

+				if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))

+					return 0;

+			} else if (has_trans) {

+				// non-paletted image with tRNS -> source image has (constant)

+				// alpha

+				++s->img_n;

+			}

+			STBI_FREE(z->expanded);

+			z->expanded = NULL;

+			// end of PNG chunk, read and skip CRC

+			stbi__get32be(s);

+			return 1;

+		}

+		default:

+			// if critical, fail

+			if (first)

+				return stbi__err("first not IHDR", "Corrupt PNG");

+			if ((c.type & (1 << 29)) == 0) {

+#ifndef STBI_NO_FAILURE_STRINGS

+				// not threadsafe

+				static char invalid_chunk[] = "XXXX PNG chunk not known";

+				invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);

+				invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);

+				invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);

+				invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);

+#endif

+				return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");

+			}

+			stbi__skip(s, c.length);

+			break;

+		}

+		// end of PNG chunk, read and skip CRC

+		stbi__get32be(s);

+	}

+}

+static void* stbi__do_png(stbi__png* p, int* x, int* y, int* n, int req_comp, stbi__result_info* ri) {

+	void* result = NULL;

+	if (req_comp < 0 || req_comp > 4)

+		return stbi__errpuc("bad req_comp", "Internal error");

+	if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {

+		if (p->depth < 8)

+			ri->bits_per_channel = 8;

+		else

+			ri->bits_per_channel = p->depth;

+		result = p->out;

+		p->out = NULL;

+		if (req_comp && req_comp != p->s->img_out_n) {

+			if (ri->bits_per_channel == 8)

+				result = stbi__convert_format((unsigned char*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);

+			else

+				result = stbi__convert_format16((stbi__uint16*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);

+			p->s->img_out_n = req_comp;

+			if (result == NULL)

+				return result;

+		}

+		*x = p->s->img_x;

+		*y = p->s->img_y;

+		if (n)

+			*n = p->s->img_n;

+	}

+	STBI_FREE(p->out);

+	p->out = NULL;

+	STBI_FREE(p->expanded);

+	p->expanded = NULL;

+	STBI_FREE(p->idata);

+	p->idata = NULL;

+	return result;

+}

+static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	stbi__png p;

+	p.s = s;

+	return stbi__do_png(&p, x, y, comp, req_comp, ri);

+}

+static int stbi__png_test(stbi__context* s) {

+	int r;

+	r = stbi__check_png_header(s);

+	stbi__rewind(s);

+	return r;

+}

+static int stbi__png_info_raw(stbi__png* p, int* x, int* y, int* comp) {

+	if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {

+		stbi__rewind(p->s);

+		return 0;

+	}

+	if (x)

+		*x = p->s->img_x;

+	if (y)

+		*y = p->s->img_y;

+	if (comp)

+		*comp = p->s->img_n;

+	return 1;

+}

+static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp) {

+	stbi__png p;

+	p.s = s;

+	return stbi__png_info_raw(&p, x, y, comp);

+}

+static int stbi__png_is16(stbi__context* s) {

+	stbi__png p;

+	p.s = s;

+	if (!stbi__png_info_raw(&p, NULL, NULL, NULL))

+		return 0;

+	if (p.depth != 16) {

+		stbi__rewind(p.s);

+		return 0;

+	}

+	return 1;

+}

+#endif

+// Microsoft/Windows BMP image

+#ifndef STBI_NO_BMP

+static int stbi__bmp_test_raw(stbi__context* s) {

+	int r;

+	int sz;

+	if (stbi__get8(s) != 'B')

+		return 0;

+	if (stbi__get8(s) != 'M')

+		return 0;

+	stbi__get32le(s); // discard filesize

+	stbi__get16le(s); // discard reserved

+	stbi__get16le(s); // discard reserved

+	stbi__get32le(s); // discard data offset

+	sz = stbi__get32le(s);

+	r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);

+	return r;

+}

+static int stbi__bmp_test(stbi__context* s) {

+	int r = stbi__bmp_test_raw(s);

+	stbi__rewind(s);

+	return r;

+}

+// returns 0..31 for the highest set bit

+static int stbi__high_bit(unsigned int z) {

+	int n = 0;

+	if (z == 0)

+		return -1;

+	if (z >= 0x10000) {

+		n += 16;

+		z >>= 16;

+	}

+	if (z >= 0x00100) {

+		n += 8;

+		z >>= 8;

+	}

+	if (z >= 0x00010) {

+		n += 4;

+		z >>= 4;

+	}

+	if (z >= 0x00004) {

+		n += 2;

+		z >>= 2;

+	}

+	if (z >= 0x00002) {

+		n += 1; /* >>=  1;*/

+	}

+	return n;

+}

+static int stbi__bitcount(unsigned int a) {

+	a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2

+	a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4

+	a = (a + (a >> 4)) & 0x0f0f0f0f;				// max 8 per 4, now 8 bits

+	a = (a + (a >> 8));								// max 16 per 8 bits

+	a = (a + (a >> 16));							// max 32 per 8 bits

+	return a & 0xff;

+}

+// extract an arbitrarily-aligned N-bit value (N=bits)

+// from v, and then make it 8-bits long and fractionally

+// extend it to full full range.

+static int stbi__shiftsigned(unsigned int v, int shift, int bits) {

+	static unsigned int mul_table[9] = {

+		0,

+		0xff /*0b11111111*/,

+		0x55 /*0b01010101*/,

+		0x49 /*0b01001001*/,

+		0x11 /*0b00010001*/,

+		0x21 /*0b00100001*/,

+		0x41 /*0b01000001*/,

+		0x81 /*0b10000001*/,

+		0x01 /*0b00000001*/,

+	};

+	static unsigned int shift_table[9] = {

+		0, 0, 0, 1, 0, 2, 4, 6, 0,

+	};

+	if (shift < 0)

+		v <<= -shift;

+	else

+		v >>= shift;

+	STBI_ASSERT(v < 256);

+	v >>= (8 - bits);

+	STBI_ASSERT(bits >= 0 && bits <= 8);

+	return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];

+}

+typedef struct {

+	int bpp, offset, hsz;

+	unsigned int mr, mg, mb, ma, all_a;

+	int extra_read;

+} stbi__bmp_data;

+static void* stbi__bmp_parse_header(stbi__context* s, stbi__bmp_data* info) {

+	int hsz;

+	if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M')

+		return stbi__errpuc("not BMP", "Corrupt BMP");

+	stbi__get32le(s); // discard filesize

+	stbi__get16le(s); // discard reserved

+	stbi__get16le(s); // discard reserved

+	info->offset = stbi__get32le(s);

+	info->hsz = hsz = stbi__get32le(s);

+	info->mr = info->mg = info->mb = info->ma = 0;

+	info->extra_read = 14;

+	if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124)

+		return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");

+	if (hsz == 12) {

+		s->img_x = stbi__get16le(s);

+		s->img_y = stbi__get16le(s);

+	} else {

+		s->img_x = stbi__get32le(s);

+		s->img_y = stbi__get32le(s);

+	}

+	if (stbi__get16le(s) != 1)

+		return stbi__errpuc("bad BMP", "bad BMP");

+	info->bpp = stbi__get16le(s);

+	if (hsz != 12) {

+		int compress = stbi__get32le(s);

+		if (compress == 1 || compress == 2)

+			return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");

+		stbi__get32le(s); // discard sizeof

+		stbi__get32le(s); // discard hres

+		stbi__get32le(s); // discard vres

+		stbi__get32le(s); // discard colorsused

+		stbi__get32le(s); // discard max important

+		if (hsz == 40 || hsz == 56) {

+			if (hsz == 56) {

+				stbi__get32le(s);

+				stbi__get32le(s);

+				stbi__get32le(s);

+				stbi__get32le(s);

+			}

+			if (info->bpp == 16 || info->bpp == 32) {

+				if (compress == 0) {

+					if (info->bpp == 32) {

+						info->mr = 0xffu << 16;

+						info->mg = 0xffu << 8;

+						info->mb = 0xffu << 0;

+						info->ma = 0xffu << 24;

+						info->all_a = 0; // if all_a is 0 at end, then we loaded

+										 // alpha channel but it was all 0

+					} else {

+						info->mr = 31u << 10;

+						info->mg = 31u << 5;

+						info->mb = 31u << 0;

+					}

+				} else if (compress == 3) {

+					info->mr = stbi__get32le(s);

+					info->mg = stbi__get32le(s);

+					info->mb = stbi__get32le(s);

+					info->extra_read += 12;

+					// not documented, but generated by photoshop and handled by

+					// mspaint

+					if (info->mr == info->mg && info->mg == info->mb) {

+						// ?!?!?

+						return stbi__errpuc("bad BMP", "bad BMP");

+					}

+				} else

+					return stbi__errpuc("bad BMP", "bad BMP");

+			}

+		} else {

+			int i;

+			if (hsz != 108 && hsz != 124)

+				return stbi__errpuc("bad BMP", "bad BMP");

+			info->mr = stbi__get32le(s);

+			info->mg = stbi__get32le(s);

+			info->mb = stbi__get32le(s);

+			info->ma = stbi__get32le(s);

+			stbi__get32le(s); // discard color space

+			for (i = 0; i < 12; ++i)

+				stbi__get32le(s); // discard color space parameters

+			if (hsz == 124) {

+				stbi__get32le(s); // discard rendering intent

+				stbi__get32le(s); // discard offset of profile data

+				stbi__get32le(s); // discard size of profile data

+				stbi__get32le(s); // discard reserved

+			}

+		}

+	}

+	return (void*)1;

+}

+static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	stbi_uc* out;

+	unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;

+	stbi_uc pal[256][4];

+	int psize = 0, i, j, width;

+	int flip_vertically, pad, target;

+	stbi__bmp_data info;

+	STBI_NOTUSED(ri);

+	info.all_a = 255;

+	if (stbi__bmp_parse_header(s, &info) == NULL)

+		return NULL; // error code already set

+	flip_vertically = ((int)s->img_y) > 0;

+	s->img_y = abs((int)s->img_y);

+	mr = info.mr;

+	mg = info.mg;

+	mb = info.mb;

+	ma = info.ma;

+	all_a = info.all_a;

+	if (info.hsz == 12) {

+		if (info.bpp < 24)

+			psize = (info.offset - info.extra_read - 24) / 3;

+	} else {

+		if (info.bpp < 16)

+			psize = (info.offset - info.extra_read - info.hsz) >> 2;

+	}

+	if (psize == 0) {

+		STBI_ASSERT(info.offset == (s->img_buffer - s->buffer_start));

+	}

+	if (info.bpp == 24 && ma == 0xff000000)

+		s->img_n = 3;

+	else

+		s->img_n = ma ? 4 : 3;

+	if (req_comp && req_comp >= 3) // we can directly decode 3 or 4

+		target = req_comp;

+	else

+		target = s->img_n; // if they want monochrome, we'll post-convert

+	// sanity-check size

+	if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))

+		return stbi__errpuc("too large", "Corrupt BMP");

+	out = (stbi_uc*)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);

+	if (!out)

+		return stbi__errpuc("outofmem", "Out of memory");

+	if (info.bpp < 16) {

+		int z = 0;

+		if (psize == 0 || psize > 256) {

+			STBI_FREE(out);

+			return stbi__errpuc("invalid", "Corrupt BMP");

+		}

+		for (i = 0; i < psize; ++i) {

+			pal[i][2] = stbi__get8(s);

+			pal[i][1] = stbi__get8(s);

+			pal[i][0] = stbi__get8(s);

+			if (info.hsz != 12)

+				stbi__get8(s);

+			pal[i][3] = 255;

+		}

+		stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));

+		if (info.bpp == 1)

+			width = (s->img_x + 7) >> 3;

+		else if (info.bpp == 4)

+			width = (s->img_x + 1) >> 1;

+		else if (info.bpp == 8)

+			width = s->img_x;

+		else {

+			STBI_FREE(out);

+			return stbi__errpuc("bad bpp", "Corrupt BMP");

+		}

+		pad = (-width) & 3;

+		if (info.bpp == 1) {

+			for (j = 0; j < (int)s->img_y; ++j) {

+				int bit_offset = 7, v = stbi__get8(s);

+				for (i = 0; i < (int)s->img_x; ++i) {

+					int color = (v >> bit_offset) & 0x1;

+					out[z++] = pal[color][0];

+					out[z++] = pal[color][1];

+					out[z++] = pal[color][2];

+					if (target == 4)

+						out[z++] = 255;

+					if (i + 1 == (int)s->img_x)

+						break;

+					if ((--bit_offset) < 0) {

+						bit_offset = 7;

+						v = stbi__get8(s);

+					}

+				}

+				stbi__skip(s, pad);

+			}

+		} else {

+			for (j = 0; j < (int)s->img_y; ++j) {

+				for (i = 0; i < (int)s->img_x; i += 2) {

+					int v = stbi__get8(s), v2 = 0;

+					if (info.bpp == 4) {

+						v2 = v & 15;

+						v >>= 4;

+					}

+					out[z++] = pal[v][0];

+					out[z++] = pal[v][1];

+					out[z++] = pal[v][2];

+					if (target == 4)

+						out[z++] = 255;

+					if (i + 1 == (int)s->img_x)

+						break;

+					v = (info.bpp == 8) ? stbi__get8(s) : v2;

+					out[z++] = pal[v][0];

+					out[z++] = pal[v][1];

+					out[z++] = pal[v][2];

+					if (target == 4)

+						out[z++] = 255;

+				}

+				stbi__skip(s, pad);

+			}

+		}

+	} else {

+		int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;

+		int z = 0;

+		int easy = 0;

+		stbi__skip(s, info.offset - info.extra_read - info.hsz);

+		if (info.bpp == 24)

+			width = 3 * s->img_x;

+		else if (info.bpp == 16)

+			width = 2 * s->img_x;

+		else /* bpp = 32 and pad = 0 */

+			width = 0;

+		pad = (-width) & 3;

+		if (info.bpp == 24) {

+			easy = 1;

+		} else if (info.bpp == 32) {

+			if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)

+				easy = 2;

+		}

+		if (!easy) {

+			if (!mr || !mg || !mb) {

+				STBI_FREE(out);

+				return stbi__errpuc("bad masks", "Corrupt BMP");

+			}

+			// right shift amt to put high bit in position #7

+			rshift = stbi__high_bit(mr) - 7;

+			rcount = stbi__bitcount(mr);

+			gshift = stbi__high_bit(mg) - 7;

+			gcount = stbi__bitcount(mg);

+			bshift = stbi__high_bit(mb) - 7;

+			bcount = stbi__bitcount(mb);

+			ashift = stbi__high_bit(ma) - 7;

+			acount = stbi__bitcount(ma);

+		}

+		for (j = 0; j < (int)s->img_y; ++j) {

+			if (easy) {

+				for (i = 0; i < (int)s->img_x; ++i) {

+					unsigned char a;

+					out[z + 2] = stbi__get8(s);

+					out[z + 1] = stbi__get8(s);

+					out[z + 0] = stbi__get8(s);

+					z += 3;

+					a = (easy == 2 ? stbi__get8(s) : 255);

+					all_a |= a;

+					if (target == 4)

+						out[z++] = a;

+				}

+			} else {

+				int bpp = info.bpp;

+				for (i = 0; i < (int)s->img_x; ++i) {

+					stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s) : stbi__get32le(s));

+					unsigned int a;

+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));

+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));

+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));

+					a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);

+					all_a |= a;

+					if (target == 4)

+						out[z++] = STBI__BYTECAST(a);

+				}

+			}

+			stbi__skip(s, pad);

+		}

+	}

+	// if alpha channel is all 0s, replace with all 255s

+	if (target == 4 && all_a == 0)

+		for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)

+			out[i] = 255;

+	if (flip_vertically) {

+		stbi_uc t;

+		for (j = 0; j<(int)s->img_y>> 1; ++j) {

+			stbi_uc* p1 = out + j * s->img_x * target;

+			stbi_uc* p2 = out + (s->img_y - 1 - j) * s->img_x * target;

+			for (i = 0; i < (int)s->img_x * target; ++i) {

+				t = p1[i];

+				p1[i] = p2[i];

+				p2[i] = t;

+			}

+		}

+	}

+	if (req_comp && req_comp != target) {

+		out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);

+		if (out == NULL)

+			return out; // stbi__convert_format frees input on failure

+	}

+	*x = s->img_x;

+	*y = s->img_y;

+	if (comp)

+		*comp = s->img_n;

+	return out;

+}

+#endif

+// Targa Truevision - TGA

+// by Jonathan Dummer

+#ifndef STBI_NO_TGA

+// returns STBI_rgb or whatever, 0 on error

+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) {

+	// only RGB or RGBA (incl. 16bit) or grey allowed

+	if (is_rgb16)

+		*is_rgb16 = 0;

+	switch (bits_per_pixel) {

+	case 8:

+		return STBI_grey;

+	case 16:

+		if (is_grey)

+			return STBI_grey_alpha;

+		// fallthrough

+	case 15:

+		if (is_rgb16)

+			*is_rgb16 = 1;

+		return STBI_rgb;

+	case 24: // fallthrough

+	case 32:

+		return bits_per_pixel / 8;

+	default:

+		return 0;

+	}

+}

+static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp) {

+	int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;

+	int sz, tga_colormap_type;

+	stbi__get8(s);					   // discard Offset

+	tga_colormap_type = stbi__get8(s); // colormap type

+	if (tga_colormap_type > 1) {

+		stbi__rewind(s);

+		return 0; // only RGB or indexed allowed

+	}

+	tga_image_type = stbi__get8(s); // image type

+	if (tga_colormap_type == 1) {   // colormapped (paletted) image

+		if (tga_image_type != 1 && tga_image_type != 9) {

+			stbi__rewind(s);

+			return 0;

+		}

+		stbi__skip(s,

+				   4);		// skip index of first colormap entry and number of entries

+		sz = stbi__get8(s); //   check bits per palette color entry

+		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {

+			stbi__rewind(s);

+			return 0;

+		}

+		stbi__skip(s, 4); // skip image x and y origin

+		tga_colormap_bpp = sz;

+	} else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE

+		if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11)) {

+			stbi__rewind(s);

+			return 0; // only RGB or grey allowed, +/- RLE

+		}

+		stbi__skip(s, 9); // skip colormap specification and image x/y origin

+		tga_colormap_bpp = 0;

+	}

+	tga_w = stbi__get16le(s);

+	if (tga_w < 1) {

+		stbi__rewind(s);

+		return 0; // test width

+	}

+	tga_h = stbi__get16le(s);

+	if (tga_h < 1) {

+		stbi__rewind(s);

+		return 0; // test height

+	}

+	tga_bits_per_pixel = stbi__get8(s); // bits per pixel

+	stbi__get8(s);						// ignore alpha bits

+	if (tga_colormap_bpp != 0) {

+		if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {

+			// when using a colormap, tga_bits_per_pixel is the size of the

+			// indexes I don't think anything but 8 or 16bit indexes makes sense

+			stbi__rewind(s);

+			return 0;

+		}

+		tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);

+	} else {

+		tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);

+	}

+	if (!tga_comp) {

+		stbi__rewind(s);

+		return 0;

+	}

+	if (x)

+		*x = tga_w;

+	if (y)

+		*y = tga_h;

+	if (comp)

+		*comp = tga_comp;

+	return 1; // seems to have passed everything

+}

+static int stbi__tga_test(stbi__context* s) {

+	int res = 0;

+	int sz, tga_color_type;

+	stbi__get8(s);					//   discard Offset

+	tga_color_type = stbi__get8(s); //   color type

+	if (tga_color_type > 1)

+		goto errorEnd;		   //   only RGB or indexed allowed

+	sz = stbi__get8(s);		   //   image type

+	if (tga_color_type == 1) { // colormapped (paletted) image

+		if (sz != 1 && sz != 9)

+			goto errorEnd; // colortype 1 demands image type 1 or 9

+		stbi__skip(s,

+				   4);		// skip index of first colormap entry and number of entries

+		sz = stbi__get8(s); //   check bits per palette color entry

+		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))

+			goto errorEnd;

+		stbi__skip(s, 4); // skip image x and y origin

+	} else {			  // "normal" image w/o colormap

+		if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11))

+			goto errorEnd; // only RGB or grey allowed, +/- RLE

+		stbi__skip(s, 9);  // skip colormap specification and image x/y origin

+	}

+	if (stbi__get16le(s) < 1)

+		goto errorEnd; //   test width

+	if (stbi__get16le(s) < 1)

+		goto errorEnd;  //   test height

+	sz = stbi__get8(s); //   bits per pixel

+	if ((tga_color_type == 1) && (sz != 8) && (sz != 16))

+		goto errorEnd; // for colormapped images, bpp is size of an index

+	if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))

+		goto errorEnd;

+	res = 1; // if we got this far, everything's good and we can return 1

+			 // instead of 0

+errorEnd:

+	stbi__rewind(s);

+	return res;

+}

+// read 16bit value and convert to 24bit RGB

+static void stbi__tga_read_rgb16(stbi__context* s, stbi_uc* out) {

+	stbi__uint16 px = (stbi__uint16)stbi__get16le(s);

+	stbi__uint16 fiveBitMask = 31;

+	// we have 3 channels with 5bits each

+	int r = (px >> 10) & fiveBitMask;

+	int g = (px >> 5) & fiveBitMask;

+	int b = px & fiveBitMask;

+	// Note that this saves the data in RGB(A) order, so it doesn't need to be

+	// swapped later

+	out[0] = (stbi_uc)((r * 255) / 31);

+	out[1] = (stbi_uc)((g * 255) / 31);

+	out[2] = (stbi_uc)((b * 255) / 31);

+	// some people claim that the most significant bit might be used for alpha

+	// (possibly if an alpha-bit is set in the "image descriptor byte")

+	// but that only made 16bit test images completely translucent..

+	// so let's treat all 15 and 16bit TGAs as RGB with no alpha.

+}

+static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	//   read in the TGA header stuff

+	int tga_offset = stbi__get8(s);

+	int tga_indexed = stbi__get8(s);

+	int tga_image_type = stbi__get8(s);

+	int tga_is_RLE = 0;

+	int tga_palette_start = stbi__get16le(s);

+	int tga_palette_len = stbi__get16le(s);

+	int tga_palette_bits = stbi__get8(s);

+	int tga_x_origin = stbi__get16le(s);

+	int tga_y_origin = stbi__get16le(s);

+	int tga_width = stbi__get16le(s);

+	int tga_height = stbi__get16le(s);

+	int tga_bits_per_pixel = stbi__get8(s);

+	int tga_comp, tga_rgb16 = 0;

+	int tga_inverted = stbi__get8(s);

+	// int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused

+	// (useless?)

+	//   image data

+	unsigned char* tga_data;

+	unsigned char* tga_palette = NULL;

+	int i, j;

+	unsigned char raw_data[4] = {0};

+	int RLE_count = 0;

+	int RLE_repeating = 0;

+	int read_next_pixel = 1;

+	STBI_NOTUSED(ri);

+	STBI_NOTUSED(tga_x_origin); // @TODO

+	STBI_NOTUSED(tga_y_origin); // @TODO

+	//   do a tiny bit of precessing

+	if (tga_image_type >= 8) {

+		tga_image_type -= 8;

+		tga_is_RLE = 1;

+	}

+	tga_inverted = 1 - ((tga_inverted >> 5) & 1);

+	//   If I'm paletted, then I'll use the number of bits from the palette

+	if (tga_indexed)

+		tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);

+	else

+		tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);

+	if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have

+				   // ensured basic consistency

+		return stbi__errpuc("bad format", "Can't find out TGA pixelformat");

+	//   tga info

+	*x = tga_width;

+	*y = tga_height;

+	if (comp)

+		*comp = tga_comp;

+	if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))

+		return stbi__errpuc("too large", "Corrupt TGA");

+	tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);

+	if (!tga_data)

+		return stbi__errpuc("outofmem", "Out of memory");

+	// skip to the data's starting position (offset usually = 0)

+	stbi__skip(s, tga_offset);

+	if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {

+		for (i = 0; i < tga_height; ++i) {

+			int row = tga_inverted ? tga_height - i - 1 : i;

+			stbi_uc* tga_row = tga_data + row * tga_width * tga_comp;

+			stbi__getn(s, tga_row, tga_width * tga_comp);

+		}

+	} else {

+		//   do I need to load a palette?

+		if (tga_indexed) {

+			//   any data to skip? (offset usually = 0)

+			stbi__skip(s, tga_palette_start);

+			//   load the palette

+			tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);

+			if (!tga_palette) {

+				STBI_FREE(tga_data);

+				return stbi__errpuc("outofmem", "Out of memory");

+			}

+			if (tga_rgb16) {

+				stbi_uc* pal_entry = tga_palette;

+				STBI_ASSERT(tga_comp == STBI_rgb);

+				for (i = 0; i < tga_palette_len; ++i) {

+					stbi__tga_read_rgb16(s, pal_entry);

+					pal_entry += tga_comp;

+				}

+			} else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {

+				STBI_FREE(tga_data);

+				STBI_FREE(tga_palette);

+				return stbi__errpuc("bad palette", "Corrupt TGA");

+			}

+		}

+		//   load the data

+		for (i = 0; i < tga_width * tga_height; ++i) {

+			//   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?

+			if (tga_is_RLE) {

+				if (RLE_count == 0) {

+					//   yep, get the next byte as a RLE command

+					int RLE_cmd = stbi__get8(s);

+					RLE_count = 1 + (RLE_cmd & 127);

+					RLE_repeating = RLE_cmd >> 7;

+					read_next_pixel = 1;

+				} else if (!RLE_repeating) {

+					read_next_pixel = 1;

+				}

+			} else {

+				read_next_pixel = 1;

+			}

+			//   OK, if I need to read a pixel, do it now

+			if (read_next_pixel) {

+				//   load however much data we did have

+				if (tga_indexed) {

+					// read in index, then perform the lookup

+					int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);

+					if (pal_idx >= tga_palette_len) {

+						// invalid index

+						pal_idx = 0;

+					}

+					pal_idx *= tga_comp;

+					for (j = 0; j < tga_comp; ++j) {

+						raw_data[j] = tga_palette[pal_idx + j];

+					}

+				} else if (tga_rgb16) {

+					STBI_ASSERT(tga_comp == STBI_rgb);

+					stbi__tga_read_rgb16(s, raw_data);

+				} else {

+					//   read in the data raw

+					for (j = 0; j < tga_comp; ++j) {

+						raw_data[j] = stbi__get8(s);

+					}

+				}

+				//   clear the reading flag for the next pixel

+				read_next_pixel = 0;

+			} // end of reading a pixel

+			// copy data

+			for (j = 0; j < tga_comp; ++j)

+				tga_data[i * tga_comp + j] = raw_data[j];

+			//   in case we're in RLE mode, keep counting down

+			--RLE_count;

+		}

+		//   do I need to invert the image?

+		if (tga_inverted) {

+			for (j = 0; j * 2 < tga_height; ++j) {

+				int index1 = j * tga_width * tga_comp;

+				int index2 = (tga_height - 1 - j) * tga_width * tga_comp;

+				for (i = tga_width * tga_comp; i > 0; --i) {

+					unsigned char temp = tga_data[index1];

+					tga_data[index1] = tga_data[index2];

+					tga_data[index2] = temp;

+					++index1;

+					++index2;

+				}

+			}

+		}

+		//   clear my palette, if I had one

+		if (tga_palette != NULL) {

+			STBI_FREE(tga_palette);

+		}

+	}

+	// swap RGB - if the source data was RGB16, it already is in the right order

+	if (tga_comp >= 3 && !tga_rgb16) {

+		unsigned char* tga_pixel = tga_data;

+		for (i = 0; i < tga_width * tga_height; ++i) {

+			unsigned char temp = tga_pixel[0];

+			tga_pixel[0] = tga_pixel[2];

+			tga_pixel[2] = temp;

+			tga_pixel += tga_comp;

+		}

+	}

+	// convert to target component count

+	if (req_comp && req_comp != tga_comp)

+		tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);

+	//   the things I do to get rid of an error message, and yet keep

+	//   Microsoft's C compilers happy... [8^(

+	tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin = tga_y_origin = 0;

+	STBI_NOTUSED(tga_palette_start);

+	//   OK, done

+	return tga_data;

+}

+#endif

+// *************************************************************************************************

+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz,

+// tweaked by STB

+#ifndef STBI_NO_PSD

+static int stbi__psd_test(stbi__context* s) {

+	int r = (stbi__get32be(s) == 0x38425053);

+	stbi__rewind(s);

+	return r;

+}

+static int stbi__psd_decode_rle(stbi__context* s, stbi_uc* p, int pixelCount) {

+	int count, nleft, len;

+	count = 0;

+	while ((nleft = pixelCount - count) > 0) {

+		len = stbi__get8(s);

+		if (len == 128) {

+			// No-op.

+		} else if (len < 128) {

+			// Copy next len+1 bytes literally.

+			len++;

+			if (len > nleft)

+				return 0; // corrupt data

+			count += len;

+			while (len) {

+				*p = stbi__get8(s);

+				p += 4;

+				len--;

+			}

+		} else if (len > 128) {

+			stbi_uc val;

+			// Next -len+1 bytes in the dest are replicated from next source

+			// byte. (Interpret len as a negative 8-bit int.)

+			len = 257 - len;

+			if (len > nleft)

+				return 0; // corrupt data

+			val = stbi__get8(s);

+			count += len;

+			while (len) {

+				*p = val;

+				p += 4;

+				len--;

+			}

+		}

+	}

+	return 1;

+}

+static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc) {

+	int pixelCount;

+	int channelCount, compression;

+	int channel, i;

+	int bitdepth;

+	int w, h;

+	stbi_uc* out;

+	STBI_NOTUSED(ri);

+	// Check identifier

+	if (stbi__get32be(s) != 0x38425053) // "8BPS"

+		return stbi__errpuc("not PSD", "Corrupt PSD image");

+	// Check file type version.

+	if (stbi__get16be(s) != 1)

+		return stbi__errpuc("wrong version", "Unsupported version of PSD image");

+	// Skip 6 reserved bytes.

+	stbi__skip(s, 6);

+	// Read the number of channels (R, G, B, A, etc).

+	channelCount = stbi__get16be(s);

+	if (channelCount < 0 || channelCount > 16)

+		return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");

+	// Read the rows and columns of the image.

+	h = stbi__get32be(s);

+	w = stbi__get32be(s);

+	// Make sure the depth is 8 bits.

+	bitdepth = stbi__get16be(s);

+	if (bitdepth != 8 && bitdepth != 16)

+		return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");

+	// Make sure the color mode is RGB.

+	// Valid options are:

+	//   0: Bitmap

+	//   1: Grayscale

+	//   2: Indexed color

+	//   3: RGB color

+	//   4: CMYK color

+	//   7: Multichannel

+	//   8: Duotone

+	//   9: Lab color

+	if (stbi__get16be(s) != 3)

+		return stbi__errpuc("wrong color format", "PSD is not in RGB color format");

+	// Skip the Mode Data.  (It's the palette for indexed color; other info for

+	// other modes.)

+	stbi__skip(s, stbi__get32be(s));

+	// Skip the image resources.  (resolution, pen tool paths, etc)

+	stbi__skip(s, stbi__get32be(s));

+	// Skip the reserved data.

+	stbi__skip(s, stbi__get32be(s));

+	// Find out if the data is compressed.

+	// Known values:

+	//   0: no compression

+	//   1: RLE compressed

+	compression = stbi__get16be(s);

+	if (compression > 1)

+		return stbi__errpuc("bad compression", "PSD has an unknown compression format");

+	// Check size

+	if (!stbi__mad3sizes_valid(4, w, h, 0))

+		return stbi__errpuc("too large", "Corrupt PSD");

+	// Create the destination image.

+	if (!compression && bitdepth == 16 && bpc == 16) {

+		out = (stbi_uc*)stbi__malloc_mad3(8, w, h, 0);

+		ri->bits_per_channel = 16;

+	} else

+		out = (stbi_uc*)stbi__malloc(4 * w * h);

+	if (!out)

+		return stbi__errpuc("outofmem", "Out of memory");

+	pixelCount = w * h;

+	// Initialize the data to zero.

+	// memset( out, 0, pixelCount * 4 );

+	// Finally, the image data.

+	if (compression) {

+		// RLE as used by .PSD and .TIFF

+		// Loop until you get the number of unpacked bytes you are expecting:

+		//     Read the next source byte into n.

+		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes

+		//     literally. Else if n is between -127 and -1 inclusive, copy the

+		//     next byte -n+1 times. Else if n is 128, noop.

+		// Endloop

+		// The RLE-compressed data is preceded by a 2-byte data count for each

+		// row in the data, which we're going to just skip.

+		stbi__skip(s, h * channelCount * 2);

+		// Read the RLE data by channel.

+		for (channel = 0; channel < 4; channel++) {

+			stbi_uc* p;

+			p = out + channel;

+			if (channel >= channelCount) {

+				// Fill this channel with default data.

+				for (i = 0; i < pixelCount; i++, p += 4)

+					*p = (channel == 3 ? 255 : 0);

+			} else {

+				// Read the RLE data.

+				if (!stbi__psd_decode_rle(s, p, pixelCount)) {

+					STBI_FREE(out);

+					return stbi__errpuc("corrupt", "bad RLE data");

+				}

+			}

+		}

+	} else {

+		// We're at the raw image data.  It's each channel in order (Red, Green,

+		// Blue, Alpha, ...) where each channel consists of an 8-bit (or 16-bit)

+		// value for each pixel in the image.

+		// Read the data by channel.

+		for (channel = 0; channel < 4; channel++) {

+			if (channel >= channelCount) {

+				// Fill this channel with default data.

+				if (bitdepth == 16 && bpc == 16) {

+					stbi__uint16* q = ((stbi__uint16*)out) + channel;

+					stbi__uint16 val = channel == 3 ? 65535 : 0;

+					for (i = 0; i < pixelCount; i++, q += 4)

+						*q = val;

+				} else {

+					stbi_uc* p = out + channel;

+					stbi_uc val = channel == 3 ? 255 : 0;

+					for (i = 0; i < pixelCount; i++, p += 4)

+						*p = val;

+				}

+			} else {

+				if (ri->bits_per_channel == 16) { // output bpc

+					stbi__uint16* q = ((stbi__uint16*)out) + channel;

+					for (i = 0; i < pixelCount; i++, q += 4)

+						*q = (stbi__uint16)stbi__get16be(s);

+				} else {

+					stbi_uc* p = out + channel;

+					if (bitdepth == 16) { // input bpc

+						for (i = 0; i < pixelCount; i++, p += 4)

+							*p = (stbi_uc)(stbi__get16be(s) >> 8);

+					} else {

+						for (i = 0; i < pixelCount; i++, p += 4)

+							*p = stbi__get8(s);

+					}

+				}

+			}

+		}

+	}

+	// remove weird white matte from PSD

+	if (channelCount >= 4) {

+		if (ri->bits_per_channel == 16) {

+			for (i = 0; i < w * h; ++i) {

+				stbi__uint16* pixel = (stbi__uint16*)out + 4 * i;

+				if (pixel[3] != 0 && pixel[3] != 65535) {

+					float a = pixel[3] / 65535.0f;

+					float ra = 1.0f / a;

+					float inv_a = 65535.0f * (1 - ra);

+					pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);

+					pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);

+					pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);

+				}

+			}

+		} else {

+			for (i = 0; i < w * h; ++i) {

+				unsigned char* pixel = out + 4 * i;

+				if (pixel[3] != 0 && pixel[3] != 255) {

+					float a = pixel[3] / 255.0f;

+					float ra = 1.0f / a;

+					float inv_a = 255.0f * (1 - ra);

+					pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);

+					pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);

+					pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);

+				}

+			}

+		}

+	}

+	// convert to desired output format

+	if (req_comp && req_comp != 4) {

+		if (ri->bits_per_channel == 16)

+			out = (stbi_uc*)stbi__convert_format16((stbi__uint16*)out, 4, req_comp, w, h);

+		else

+			out = stbi__convert_format(out, 4, req_comp, w, h);

+		if (out == NULL)

+			return out; // stbi__convert_format frees input on failure

+	}

+	if (comp)

+		*comp = 4;

+	*y = h;

+	*x = w;

+	return out;

+}

+#endif

+// *************************************************************************************************

+// Softimage PIC loader

+// by Tom Seddon

+//

+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format

+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/

+#ifndef STBI_NO_PIC

+static int stbi__pic_is4(stbi__context* s, const char* str) {

+	int i;

+	for (i = 0; i < 4; ++i)

+		if (stbi__get8(s) != (stbi_uc)str[i])

+			return 0;

+	return 1;

+}

+static int stbi__pic_test_core(stbi__context* s) {

+	int i;

+	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))

+		return 0;

+	for (i = 0; i < 84; ++i)

+		stbi__get8(s);

+	if (!stbi__pic_is4(s, "PICT"))

+		return 0;

+	return 1;

+}

+typedef struct {

+	stbi_uc size, type, channel;

+} stbi__pic_packet;

+static stbi_uc* stbi__readval(stbi__context* s, int channel, stbi_uc* dest) {

+	int mask = 0x80, i;

+	for (i = 0; i < 4; ++i, mask >>= 1) {

+		if (channel & mask) {

+			if (stbi__at_eof(s))

+				return stbi__errpuc("bad file", "PIC file too short");

+			dest[i] = stbi__get8(s);

+		}

+	}

+	return dest;

+}

+static void stbi__copyval(int channel, stbi_uc* dest, const stbi_uc* src) {

+	int mask = 0x80, i;

+	for (i = 0; i < 4; ++i, mask >>= 1)

+		if (channel & mask)

+			dest[i] = src[i];

+}

+static stbi_uc* stbi__pic_load_core(stbi__context* s, int width, int height, int* comp, stbi_uc* result) {

+	int act_comp = 0, num_packets = 0, y, chained;

+	stbi__pic_packet packets[10];

+	// this will (should...) cater for even some bizarre stuff like having data

+	// for the same channel in multiple packets.

+	do {

+		stbi__pic_packet* packet;

+		if (num_packets == sizeof(packets) / sizeof(packets[0]))

+			return stbi__errpuc("bad format", "too many packets");

+		packet = &packets[num_packets++];

+		chained = stbi__get8(s);

+		packet->size = stbi__get8(s);

+		packet->type = stbi__get8(s);

+		packet->channel = stbi__get8(s);

+		act_comp |= packet->channel;

+		if (stbi__at_eof(s))

+			return stbi__errpuc("bad file", "file too short (reading packets)");

+		if (packet->size != 8)

+			return stbi__errpuc("bad format", "packet isn't 8bpp");

+	} while (chained);

+	*comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?

+	for (y = 0; y < height; ++y) {

+		int packet_idx;

+		for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {

+			stbi__pic_packet* packet = &packets[packet_idx];

+			stbi_uc* dest = result + y * width * 4;

+			switch (packet->type) {

+			default:

+				return stbi__errpuc("bad format", "packet has bad compression type");

+			case 0: { // uncompressed

+				int x;

+				for (x = 0; x < width; ++x, dest += 4)

+					if (!stbi__readval(s, packet->channel, dest))

+						return 0;

+				break;

+			}

+			case 1: // Pure RLE

+			{

+				int left = width, i;

+				while (left > 0) {

+					stbi_uc count, value[4];

+					count = stbi__get8(s);

+					if (stbi__at_eof(s))

+						return stbi__errpuc("bad file", "file too short (pure read count)");

+					if (count > left)

+						count = (stbi_uc)left;

+					if (!stbi__readval(s, packet->channel, value))

+						return 0;

+					for (i = 0; i < count; ++i, dest += 4)

+						stbi__copyval(packet->channel, dest, value);

+					left -= count;

+				}

+			} break;

+			case 2: { // Mixed RLE

+				int left = width;

+				while (left > 0) {

+					int count = stbi__get8(s), i;

+					if (stbi__at_eof(s))

+						return stbi__errpuc("bad file", "file too short (mixed read count)");

+					if (count >= 128) { // Repeated

+						stbi_uc value[4];

+						if (count == 128)

+							count = stbi__get16be(s);

+						else

+							count -= 127;

+						if (count > left)

+							return stbi__errpuc("bad file", "scanline overrun");

+						if (!stbi__readval(s, packet->channel, value))

+							return 0;

+						for (i = 0; i < count; ++i, dest += 4)

+							stbi__copyval(packet->channel, dest, value);

+					} else { // Raw

+						++count;

+						if (count > left)

+							return stbi__errpuc("bad file", "scanline overrun");

+						for (i = 0; i < count; ++i, dest += 4)

+							if (!stbi__readval(s, packet->channel, dest))

+								return 0;

+					}

+					left -= count;

+				}

+				break;

+			}

+			}

+		}

+	}

+	return result;

+}

+static void* stbi__pic_load(stbi__context* s, int* px, int* py, int* comp, int req_comp, stbi__result_info* ri) {

+	stbi_uc* result;

+	int i, x, y, internal_comp;

+	STBI_NOTUSED(ri);

+	if (!comp)

+		comp = &internal_comp;

+	for (i = 0; i < 92; ++i)

+		stbi__get8(s);

+	x = stbi__get16be(s);

+	y = stbi__get16be(s);

+	if (stbi__at_eof(s))

+		return stbi__errpuc("bad file", "file too short (pic header)");

+	if (!stbi__mad3sizes_valid(x, y, 4, 0))

+		return stbi__errpuc("too large", "PIC image too large to decode");

+	stbi__get32be(s); // skip `ratio'

+	stbi__get16be(s); // skip `fields'

+	stbi__get16be(s); // skip `pad'

+	// intermediate buffer is RGBA

+	result = (stbi_uc*)stbi__malloc_mad3(x, y, 4, 0);

+	memset(result, 0xff, x * y * 4);

+	if (!stbi__pic_load_core(s, x, y, comp, result)) {

+		STBI_FREE(result);

+		result = 0;

+	}

+	*px = x;

+	*py = y;

+	if (req_comp == 0)

+		req_comp = *comp;

+	result = stbi__convert_format(result, 4, req_comp, x, y);

+	return result;

+}

+static int stbi__pic_test(stbi__context* s) {

+	int r = stbi__pic_test_core(s);

+	stbi__rewind(s);

+	return r;

+}

+#endif

+// *************************************************************************************************

+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb

+#ifndef STBI_NO_GIF

+typedef struct {

+	stbi__int16 prefix;

+	stbi_uc first;

+	stbi_uc suffix;

+} stbi__gif_lzw;

+typedef struct {

+	int w, h;

+	stbi_uc* out;		 // output buffer (always 4 components)

+	stbi_uc* background; // The current "background" as far as a gif is concerned

+	stbi_uc* history;

+	int flags, bgindex, ratio, transparent, eflags;

+	stbi_uc pal[256][4];

+	stbi_uc lpal[256][4];

+	stbi__gif_lzw codes[8192];

+	stbi_uc* color_table;

+	int parse, step;

+	int lflags;

+	int start_x, start_y;

+	int max_x, max_y;

+	int cur_x, cur_y;

+	int line_size;

+	int delay;

+} stbi__gif;

+static int stbi__gif_test_raw(stbi__context* s) {

+	int sz;

+	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')

+		return 0;

+	sz = stbi__get8(s);

+	if (sz != '9' && sz != '7')

+		return 0;

+	if (stbi__get8(s) != 'a')

+		return 0;

+	return 1;

+}

+static int stbi__gif_test(stbi__context* s) {

+	int r = stbi__gif_test_raw(s);

+	stbi__rewind(s);

+	return r;

+}

+static void stbi__gif_parse_colortable(stbi__context* s, stbi_uc pal[256][4], int num_entries, int transp) {

+	int i;

+	for (i = 0; i < num_entries; ++i) {

+		pal[i][2] = stbi__get8(s);

+		pal[i][1] = stbi__get8(s);

+		pal[i][0] = stbi__get8(s);

+		pal[i][3] = transp == i ? 0 : 255;

+	}

+}

+static int stbi__gif_header(stbi__context* s, stbi__gif* g, int* comp, int is_info) {

+	stbi_uc version;

+	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')

+		return stbi__err("not GIF", "Corrupt GIF");

+	version = stbi__get8(s);

+	if (version != '7' && version != '9')

+		return stbi__err("not GIF", "Corrupt GIF");

+	if (stbi__get8(s) != 'a')

+		return stbi__err("not GIF", "Corrupt GIF");

+	stbi__g_failure_reason = "";

+	g->w = stbi__get16le(s);

+	g->h = stbi__get16le(s);

+	g->flags = stbi__get8(s);

+	g->bgindex = stbi__get8(s);

+	g->ratio = stbi__get8(s);

+	g->transparent = -1;

+	if (comp != 0)

+		*comp = 4; // can't actually tell whether it's 3 or 4 until we parse the

+				   // comments

+	if (is_info)

+		return 1;

+	if (g->flags & 0x80)

+		stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);

+	return 1;

+}

+static int stbi__gif_info_raw(stbi__context* s, int* x, int* y, int* comp) {

+	stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));

+	if (!stbi__gif_header(s, g, comp, 1)) {

+		STBI_FREE(g);

+		stbi__rewind(s);

+		return 0;

+	}

+	if (x)

+		*x = g->w;

+	if (y)

+		*y = g->h;

+	STBI_FREE(g);

+	return 1;

+}

+static void stbi__out_gif_code(stbi__gif* g, stbi__uint16 code) {

+	stbi_uc *p, *c;

+	int idx;

+	// recurse to decode the prefixes, since the linked-list is backwards,

+	// and working backwards through an interleaved image would be nasty

+	if (g->codes[code].prefix >= 0)

+		stbi__out_gif_code(g, g->codes[code].prefix);

+	if (g->cur_y >= g->max_y)

+		return;

+	idx = g->cur_x + g->cur_y;

+	p = &g->out[idx];

+	g->history[idx / 4] = 1;

+	c = &g->color_table[g->codes[code].suffix * 4];

+	if (c[3] > 128) { // don't render transparent pixels;

+		p[0] = c[2];

+		p[1] = c[1];

+		p[2] = c[0];

+		p[3] = c[3];

+	}

+	g->cur_x += 4;

+	if (g->cur_x >= g->max_x) {

+		g->cur_x = g->start_x;

+		g->cur_y += g->step;

+		while (g->cur_y >= g->max_y && g->parse > 0) {

+			g->step = (1 << g->parse) * g->line_size;

+			g->cur_y = g->start_y + (g->step >> 1);

+			--g->parse;

+		}

+	}

+}

+static stbi_uc* stbi__process_gif_raster(stbi__context* s, stbi__gif* g) {

+	stbi_uc lzw_cs;

+	stbi__int32 len, init_code;

+	stbi__uint32 first;

+	stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;

+	stbi__gif_lzw* p;

+	lzw_cs = stbi__get8(s);

+	if (lzw_cs > 12)

+		return NULL;

+	clear = 1 << lzw_cs;

+	first = 1;

+	codesize = lzw_cs + 1;

+	codemask = (1 << codesize) - 1;

+	bits = 0;

+	valid_bits = 0;

+	for (init_code = 0; init_code < clear; init_code++) {

+		g->codes[init_code].prefix = -1;

+		g->codes[init_code].first = (stbi_uc)init_code;

+		g->codes[init_code].suffix = (stbi_uc)init_code;

+	}

+	// support no starting clear code

+	avail = clear + 2;

+	oldcode = -1;

+	len = 0;

+	for (;;) {

+		if (valid_bits < codesize) {

+			if (len == 0) {

+				len = stbi__get8(s); // start new block

+				if (len == 0)

+					return g->out;

+			}

+			--len;

+			bits |= (stbi__int32)stbi__get8(s) << valid_bits;

+			valid_bits += 8;

+		} else {

+			stbi__int32 code = bits & codemask;

+			bits >>= codesize;

+			valid_bits -= codesize;

+			// @OPTIMIZE: is there some way we can accelerate the non-clear

+			// path?

+			if (code == clear) { // clear code

+				codesize = lzw_cs + 1;

+				codemask = (1 << codesize) - 1;

+				avail = clear + 2;

+				oldcode = -1;

+				first = 0;

+			} else if (code == clear + 1) { // end of stream code

+				stbi__skip(s, len);

+				while ((len = stbi__get8(s)) > 0)

+					stbi__skip(s, len);

+				return g->out;

+			} else if (code <= avail) {

+				if (first) {

+					return stbi__errpuc("no clear code", "Corrupt GIF");

+				}

+				if (oldcode >= 0) {

+					p = &g->codes[avail++];

+					if (avail > 8192) {

+						return stbi__errpuc("too many codes", "Corrupt GIF");

+					}

+					p->prefix = (stbi__int16)oldcode;

+					p->first = g->codes[oldcode].first;

+					p->suffix = (code == avail) ? p->first : g->codes[code].first;

+				} else if (code == avail)

+					return stbi__errpuc("illegal code in raster", "Corrupt GIF");

+				stbi__out_gif_code(g, (stbi__uint16)code);

+				if ((avail & codemask) == 0 && avail <= 0x0FFF) {

+					codesize++;

+					codemask = (1 << codesize) - 1;

+				}

+				oldcode = code;

+			} else {

+				return stbi__errpuc("illegal code in raster", "Corrupt GIF");

+			}

+		}

+	}

+}

+// this function is designed to support animated gifs, although stb_image

+// doesn't support it two back is the image from two frames ago, used for a very

+// specific disposal format

+static stbi_uc* stbi__gif_load_next(stbi__context* s, stbi__gif* g, int* comp, int req_comp, stbi_uc* two_back) {

+	int dispose;

+	int first_frame;

+	int pi;

+	int pcount;

+	STBI_NOTUSED(req_comp);

+	// on first frame, any non-written pixels get the background colour

+	// (non-transparent)

+	first_frame = 0;

+	if (g->out == 0) {

+		if (!stbi__gif_header(s, g, comp, 0))

+			return 0; // stbi__g_failure_reason set by stbi__gif_header

+		if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))

+			return stbi__errpuc("too large", "GIF image is too large");

+		pcount = g->w * g->h;

+		g->out = (stbi_uc*)stbi__malloc(4 * pcount);

+		g->background = (stbi_uc*)stbi__malloc(4 * pcount);

+		g->history = (stbi_uc*)stbi__malloc(pcount);

+		if (!g->out || !g->background || !g->history)

+			return stbi__errpuc("outofmem", "Out of memory");

+		// image is treated as "transparent" at the start - ie, nothing

+		// overwrites the current background; background colour is only used for

+		// pixels that are not rendered first frame, after that "background"

+		// color refers to the color that was there the previous frame.

+		memset(g->out, 0x00, 4 * pcount);

+		memset(g->background, 0x00,

+			   4 * pcount); // state of the background (starts transparent)

+		memset(g->history, 0x00,

+			   pcount); // pixels that were affected previous frame

+		first_frame = 1;

+	} else {

+		// second frame - how do we dispoase of the previous one?

+		dispose = (g->eflags & 0x1C) >> 2;

+		pcount = g->w * g->h;

+		if ((dispose == 3) && (two_back == 0)) {

+			dispose = 2; // if I don't have an image to revert back to, default

+						 // to the old background

+		}

+		if (dispose == 3) { // use previous graphic

+			for (pi = 0; pi < pcount; ++pi) {

+				if (g->history[pi]) {

+					memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);

+				}

+			}

+		} else if (dispose == 2) {

+			// restore what was changed last frame to background before that

+			// frame;

+			for (pi = 0; pi < pcount; ++pi) {

+				if (g->history[pi]) {

+					memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);

+				}

+			}

+		} else {

+			// This is a non-disposal case eithe way, so just

+			// leave the pixels as is, and they will become the new background

+			// 1: do not dispose

+			// 0:  not specified.

+		}

+		// background is what out is after the undoing of the previou frame;

+		memcpy(g->background, g->out, 4 * g->w * g->h);

+	}

+	// clear my history;

+	memset(g->history, 0x00,

+		   g->w * g->h); // pixels that were affected previous frame

+	for (;;) {

+		int tag = stbi__get8(s);

+		switch (tag) {

+		case 0x2C: /* Image Descriptor */

+		{

+			stbi__int32 x, y, w, h;

+			stbi_uc* o;

+			x = stbi__get16le(s);

+			y = stbi__get16le(s);

+			w = stbi__get16le(s);

+			h = stbi__get16le(s);

+			if (((x + w) > (g->w)) || ((y + h) > (g->h)))

+				return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");

+			g->line_size = g->w * 4;

+			g->start_x = x * 4;

+			g->start_y = y * g->line_size;

+			g->max_x = g->start_x + w * 4;

+			g->max_y = g->start_y + h * g->line_size;

+			g->cur_x = g->start_x;

+			g->cur_y = g->start_y;

+			// if the width of the specified rectangle is 0, that means

+			// we may not see *any* pixels or the image is malformed;

+			// to make sure this is caught, move the current y down to

+			// max_y (which is what out_gif_code checks).

+			if (w == 0)

+				g->cur_y = g->max_y;

+			g->lflags = stbi__get8(s);

+			if (g->lflags & 0x40) {

+				g->step = 8 * g->line_size; // first interlaced spacing

+				g->parse = 3;

+			} else {

+				g->step = g->line_size;

+				g->parse = 0;

+			}

+			if (g->lflags & 0x80) {

+				stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);

+				g->color_table = (stbi_uc*)g->lpal;

+			} else if (g->flags & 0x80) {

+				g->color_table = (stbi_uc*)g->pal;

+			} else

+				return stbi__errpuc("missing color table", "Corrupt GIF");

+			o = stbi__process_gif_raster(s, g);

+			if (!o)

+				return NULL;

+			// if this was the first frame,

+			pcount = g->w * g->h;

+			if (first_frame && (g->bgindex > 0)) {

+				// if first frame, any pixel not drawn to gets the background

+				// color

+				for (pi = 0; pi < pcount; ++pi) {

+					if (g->history[pi] == 0) {

+						g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo

+													 // that; It will be reset next frame if need

+													 // be;

+						memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);

+					}

+				}

+			}

+			return o;

+		}

+		case 0x21: // Comment Extension.

+		{

+			int len;

+			int ext = stbi__get8(s);

+			if (ext == 0xF9) { // Graphic Control Extension.

+				len = stbi__get8(s);

+				if (len == 4) {

+					g->eflags = stbi__get8(s);

+					g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second,

+													  // saving as 1/1000ths.

+					// unset old transparent

+					if (g->transparent >= 0) {

+						g->pal[g->transparent][3] = 255;

+					}

+					if (g->eflags & 0x01) {

+						g->transparent = stbi__get8(s);

+						if (g->transparent >= 0) {

+							g->pal[g->transparent][3] = 0;

+						}

+					} else {

+						// don't need transparent

+						stbi__skip(s, 1);

+						g->transparent = -1;

+					}

+				} else {

+					stbi__skip(s, len);

+					break;

+				}

+			}

+			while ((len = stbi__get8(s)) != 0) {

+				stbi__skip(s, len);

+			}

+			break;

+		}

+		case 0x3B:				// gif stream termination code

+			return (stbi_uc*)s; // using '1' causes warning on some compilers

+		default:

+			return stbi__errpuc("unknown code", "Corrupt GIF");

+		}

+	}

+}

+static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp) {

+	if (stbi__gif_test(s)) {

+		int layers = 0;

+		stbi_uc* u = 0;

+		stbi_uc* out = 0;

+		stbi_uc* two_back = 0;

+		stbi__gif g;

+		int stride;

+		memset(&g, 0, sizeof(g));

+		if (delays) {

+			*delays = 0;

+		}

+		do {

+			u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);

+			if (u == (stbi_uc*)s)

+				u = 0; // end of animated gif marker

+			if (u) {

+				*x = g.w;

+				*y = g.h;

+				++layers;

+				stride = g.w * g.h * 4;

+				if (out) {

+					void* tmp = (stbi_uc*)STBI_REALLOC(out, layers * stride);

+					if (NULL == tmp) {

+						STBI_FREE(g.out);

+						STBI_FREE(g.history);

+						STBI_FREE(g.background);

+						return stbi__errpuc("outofmem", "Out of memory");

+					} else

+						out = (stbi_uc*)tmp;

+					if (delays) {

+						*delays = (int*)STBI_REALLOC(*delays, sizeof(int) * layers);

+					}

+				} else {

+					out = (stbi_uc*)stbi__malloc(layers * stride);

+					if (delays) {

+						*delays = (int*)stbi__malloc(layers * sizeof(int));

+					}

+				}

+				memcpy(out + ((layers - 1) * stride), u, stride);

+				if (layers >= 2) {

+					two_back = out - 2 * stride;

+				}

+				if (delays) {

+					(*delays)[layers - 1U] = g.delay;

+				}

+			}

+		} while (u != 0);

+		// free temp buffer;

+		STBI_FREE(g.out);

+		STBI_FREE(g.history);

+		STBI_FREE(g.background);

+		// do the final conversion after loading everything;

+		if (req_comp && req_comp != 4)

+			out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);

+		*z = layers;

+		return out;

+	} else {

+		return stbi__errpuc("not GIF", "Image was not as a gif type.");

+	}

+}

+static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	stbi_uc* u = 0;

+	stbi__gif g;

+	memset(&g, 0, sizeof(g));

+	STBI_NOTUSED(ri);

+	u = stbi__gif_load_next(s, &g, comp, req_comp, 0);

+	if (u == (stbi_uc*)s)

+		u = 0; // end of animated gif marker

+	if (u) {

+		*x = g.w;

+		*y = g.h;

+		// moved conversion to after successful load so that the same

+		// can be done for multiple frames.

+		if (req_comp && req_comp != 4)

+			u = stbi__convert_format(u, 4, req_comp, g.w, g.h);

+	} else if (g.out) {

+		// if there was an error and we allocated an image buffer, free it!

+		STBI_FREE(g.out);

+	}

+	// free buffers needed for multiple frame loading;

+	STBI_FREE(g.history);

+	STBI_FREE(g.background);

+	return u;

+}

+static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp) { return stbi__gif_info_raw(s, x, y, comp); }

+#endif

+// *************************************************************************************************

+// Radiance RGBE HDR loader

+// originally by Nicolas Schulz

+#ifndef STBI_NO_HDR

+static int stbi__hdr_test_core(stbi__context* s, const char* signature) {

+	int i;

+	for (i = 0; signature[i]; ++i)

+		if (stbi__get8(s) != signature[i])

+			return 0;

+	stbi__rewind(s);

+	return 1;

+}

+static int stbi__hdr_test(stbi__context* s) {

+	int r = stbi__hdr_test_core(s, "#?RADIANCE\n");

+	stbi__rewind(s);

+	if (!r) {

+		r = stbi__hdr_test_core(s, "#?RGBE\n");

+		stbi__rewind(s);

+	}

+	return r;

+}

+#define STBI__HDR_BUFLEN 1024

+static char* stbi__hdr_gettoken(stbi__context* z, char* buffer) {

+	int len = 0;

+	char c = '\0';

+	c = (char)stbi__get8(z);

+	while (!stbi__at_eof(z) && c != '\n') {

+		buffer[len++] = c;

+		if (len == STBI__HDR_BUFLEN - 1) {

+			// flush to end of line

+			while (!stbi__at_eof(z) && stbi__get8(z) != '\n')

+				;

+			break;

+		}

+		c = (char)stbi__get8(z);

+	}

+	buffer[len] = 0;

+	return buffer;

+}

+static void stbi__hdr_convert(float* output, stbi_uc* input, int req_comp) {

+	if (input[3] != 0) {

+		float f1;

+		// Exponent

+		f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));

+		if (req_comp <= 2)

+			output[0] = (input[0] + input[1] + input[2]) * f1 / 3;

+		else {

+			output[0] = input[0] * f1;

+			output[1] = input[1] * f1;

+			output[2] = input[2] * f1;

+		}

+		if (req_comp == 2)

+			output[1] = 1;

+		if (req_comp == 4)

+			output[3] = 1;

+	} else {

+		switch (req_comp) {

+		case 4:

+			output[3] = 1; /* fallthrough */

+		case 3:

+			output[0] = output[1] = output[2] = 0;

+			break;

+		case 2:

+			output[1] = 1; /* fallthrough */

+		case 1:

+			output[0] = 0;

+			break;

+		}

+	}

+}

+static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	char buffer[STBI__HDR_BUFLEN];

+	char* token;

+	int valid = 0;

+	int width, height;

+	stbi_uc* scanline;

+	float* hdr_data;

+	int len;

+	unsigned char count, value;

+	int i, j, k, c1, c2, z;

+	const char* headerToken;

+	STBI_NOTUSED(ri);

+	// Check identifier

+	headerToken = stbi__hdr_gettoken(s, buffer);

+	if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)

+		return stbi__errpf("not HDR", "Corrupt HDR image");

+	// Parse header

+	for (;;) {

+		token = stbi__hdr_gettoken(s, buffer);

+		if (token[0] == 0)

+			break;

+		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)

+			valid = 1;

+	}

+	if (!valid)

+		return stbi__errpf("unsupported format", "Unsupported HDR format");

+	// Parse width and height

+	// can't use sscanf() if we're not using stdio!

+	token = stbi__hdr_gettoken(s, buffer);

+	if (strncmp(token, "-Y ", 3))

+		return stbi__errpf("unsupported data layout", "Unsupported HDR format");

+	token += 3;

+	height = (int)strtol(token, &token, 10);

+	while (*token == ' ')

+		++token;

+	if (strncmp(token, "+X ", 3))

+		return stbi__errpf("unsupported data layout", "Unsupported HDR format");

+	token += 3;

+	width = (int)strtol(token, NULL, 10);

+	*x = width;

+	*y = height;

+	if (comp)

+		*comp = 3;

+	if (req_comp == 0)

+		req_comp = 3;

+	if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))

+		return stbi__errpf("too large", "HDR image is too large");

+	// Read data

+	hdr_data = (float*)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);

+	if (!hdr_data)

+		return stbi__errpf("outofmem", "Out of memory");

+	// Load image data

+	// image data is stored as some number of sca

+	if (width < 8 || width >= 32768) {

+		// Read flat data

+		for (j = 0; j < height; ++j) {

+			for (i = 0; i < width; ++i) {

+				stbi_uc rgbe[4];

+			main_decode_loop:

+				stbi__getn(s, rgbe, 4);

+				stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);

+			}

+		}

+	} else {

+		// Read RLE-encoded data

+		scanline = NULL;

+		for (j = 0; j < height; ++j) {

+			c1 = stbi__get8(s);

+			c2 = stbi__get8(s);

+			len = stbi__get8(s);

+			if (c1 != 2 || c2 != 2 || (len & 0x80)) {

+				// not run-length encoded, so we have to actually use THIS data

+				// as a decoded pixel (note this can't be a valid pixel--one of

+				// RGB must be

+				// >= 128)

+				stbi_uc rgbe[4];

+				rgbe[0] = (stbi_uc)c1;

+				rgbe[1] = (stbi_uc)c2;

+				rgbe[2] = (stbi_uc)len;

+				rgbe[3] = (stbi_uc)stbi__get8(s);

+				stbi__hdr_convert(hdr_data, rgbe, req_comp);

+				i = 1;

+				j = 0;

+				STBI_FREE(scanline);

+				goto main_decode_loop; // yes, this makes no sense

+			}

+			len <<= 8;

+			len |= stbi__get8(s);

+			if (len != width) {

+				STBI_FREE(hdr_data);

+				STBI_FREE(scanline);

+				return stbi__errpf("invalid decoded scanline length", "corrupt HDR");

+			}

+			if (scanline == NULL) {

+				scanline = (stbi_uc*)stbi__malloc_mad2(width, 4, 0);

+				if (!scanline) {

+					STBI_FREE(hdr_data);

+					return stbi__errpf("outofmem", "Out of memory");

+				}

+			}

+			for (k = 0; k < 4; ++k) {

+				int nleft;

+				i = 0;

+				while ((nleft = width - i) > 0) {

+					count = stbi__get8(s);

+					if (count > 128) {

+						// Run

+						value = stbi__get8(s);

+						count -= 128;

+						if (count > nleft) {

+							STBI_FREE(hdr_data);

+							STBI_FREE(scanline);

+							return stbi__errpf("corrupt", "bad RLE data in HDR");

+						}

+						for (z = 0; z < count; ++z)

+							scanline[i++ * 4 + k] = value;

+					} else {

+						// Dump

+						if (count > nleft) {

+							STBI_FREE(hdr_data);

+							STBI_FREE(scanline);

+							return stbi__errpf("corrupt", "bad RLE data in HDR");

+						}

+						for (z = 0; z < count; ++z)

+							scanline[i++ * 4 + k] = stbi__get8(s);

+					}

+				}

+			}

+			for (i = 0; i < width; ++i)

+				stbi__hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);

+		}

+		if (scanline)

+			STBI_FREE(scanline);

+	}

+	return hdr_data;

+}

+static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp) {

+	char buffer[STBI__HDR_BUFLEN];

+	char* token;

+	int valid = 0;

+	int dummy;

+	if (!x)

+		x = &dummy;

+	if (!y)

+		y = &dummy;

+	if (!comp)

+		comp = &dummy;

+	if (stbi__hdr_test(s) == 0) {

+		stbi__rewind(s);

+		return 0;

+	}

+	for (;;) {

+		token = stbi__hdr_gettoken(s, buffer);

+		if (token[0] == 0)

+			break;

+		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)

+			valid = 1;

+	}

+	if (!valid) {

+		stbi__rewind(s);

+		return 0;

+	}

+	token = stbi__hdr_gettoken(s, buffer);

+	if (strncmp(token, "-Y ", 3)) {

+		stbi__rewind(s);

+		return 0;

+	}

+	token += 3;

+	*y = (int)strtol(token, &token, 10);

+	while (*token == ' ')

+		++token;

+	if (strncmp(token, "+X ", 3)) {

+		stbi__rewind(s);

+		return 0;

+	}

+	token += 3;

+	*x = (int)strtol(token, NULL, 10);

+	*comp = 3;

+	return 1;

+}

+#endif // STBI_NO_HDR

+#ifndef STBI_NO_BMP

+static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp) {

+	void* p;

+	stbi__bmp_data info;

+	info.all_a = 255;

+	p = stbi__bmp_parse_header(s, &info);

+	stbi__rewind(s);

+	if (p == NULL)

+		return 0;

+	if (x)

+		*x = s->img_x;

+	if (y)

+		*y = s->img_y;

+	if (comp) {

+		if (info.bpp == 24 && info.ma == 0xff000000)

+			*comp = 3;

+		else

+			*comp = info.ma ? 4 : 3;

+	}

+	return 1;

+}

+#endif

+#ifndef STBI_NO_PSD

+static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp) {

+	int channelCount, dummy, depth;

+	if (!x)

+		x = &dummy;

+	if (!y)

+		y = &dummy;

+	if (!comp)

+		comp = &dummy;

+	if (stbi__get32be(s) != 0x38425053) {

+		stbi__rewind(s);

+		return 0;

+	}

+	if (stbi__get16be(s) != 1) {

+		stbi__rewind(s);

+		return 0;

+	}

+	stbi__skip(s, 6);

+	channelCount = stbi__get16be(s);

+	if (channelCount < 0 || channelCount > 16) {

+		stbi__rewind(s);

+		return 0;

+	}

+	*y = stbi__get32be(s);

+	*x = stbi__get32be(s);

+	depth = stbi__get16be(s);

+	if (depth != 8 && depth != 16) {

+		stbi__rewind(s);

+		return 0;

+	}

+	if (stbi__get16be(s) != 3) {

+		stbi__rewind(s);

+		return 0;

+	}

+	*comp = 4;

+	return 1;

+}

+static int stbi__psd_is16(stbi__context* s) {

+	int channelCount, depth;

+	if (stbi__get32be(s) != 0x38425053) {

+		stbi__rewind(s);

+		return 0;

+	}

+	if (stbi__get16be(s) != 1) {

+		stbi__rewind(s);

+		return 0;

+	}

+	stbi__skip(s, 6);

+	channelCount = stbi__get16be(s);

+	if (channelCount < 0 || channelCount > 16) {

+		stbi__rewind(s);

+		return 0;

+	}

+	(void)stbi__get32be(s);

+	(void)stbi__get32be(s);

+	depth = stbi__get16be(s);

+	if (depth != 16) {

+		stbi__rewind(s);

+		return 0;

+	}

+	return 1;

+}

+#endif

+#ifndef STBI_NO_PIC

+static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp) {

+	int act_comp = 0, num_packets = 0, chained, dummy;

+	stbi__pic_packet packets[10];

+	if (!x)

+		x = &dummy;

+	if (!y)

+		y = &dummy;

+	if (!comp)

+		comp = &dummy;

+	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {

+		stbi__rewind(s);

+		return 0;

+	}

+	stbi__skip(s, 88);

+	*x = stbi__get16be(s);

+	*y = stbi__get16be(s);

+	if (stbi__at_eof(s)) {

+		stbi__rewind(s);

+		return 0;

+	}

+	if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {

+		stbi__rewind(s);

+		return 0;

+	}

+	stbi__skip(s, 8);

+	do {

+		stbi__pic_packet* packet;

+		if (num_packets == sizeof(packets) / sizeof(packets[0]))

+			return 0;

+		packet = &packets[num_packets++];

+		chained = stbi__get8(s);

+		packet->size = stbi__get8(s);

+		packet->type = stbi__get8(s);

+		packet->channel = stbi__get8(s);

+		act_comp |= packet->channel;

+		if (stbi__at_eof(s)) {

+			stbi__rewind(s);

+			return 0;

+		}

+		if (packet->size != 8) {

+			stbi__rewind(s);

+			return 0;

+		}

+	} while (chained);

+	*comp = (act_comp & 0x10 ? 4 : 3);

+	return 1;

+}

+#endif

+// *************************************************************************************************

+// Portable Gray Map and Portable Pixel Map loader

+// by Ken Miller

+//

+// PGM: http://netpbm.sourceforge.net/doc/pgm.html

+// PPM: http://netpbm.sourceforge.net/doc/ppm.html

+//

+// Known limitations:

+//    Does not support comments in the header section

+//    Does not support ASCII image data (formats P2 and P3)

+//    Does not support 16-bit-per-channel

+#ifndef STBI_NO_PNM

+static int stbi__pnm_test(stbi__context* s) {

+	char p, t;

+	p = (char)stbi__get8(s);

+	t = (char)stbi__get8(s);

+	if (p != 'P' || (t != '5' && t != '6')) {

+		stbi__rewind(s);

+		return 0;

+	}

+	return 1;

+}

+static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri) {

+	stbi_uc* out;

+	STBI_NOTUSED(ri);

+	if (!stbi__pnm_info(s, (int*)&s->img_x, (int*)&s->img_y, (int*)&s->img_n))

+		return 0;

+	*x = s->img_x;

+	*y = s->img_y;

+	if (comp)

+		*comp = s->img_n;

+	if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))

+		return stbi__errpuc("too large", "PNM too large");

+	out = (stbi_uc*)stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);

+	if (!out)

+		return stbi__errpuc("outofmem", "Out of memory");

+	stbi__getn(s, out, s->img_n * s->img_x * s->img_y);

+	if (req_comp && req_comp != s->img_n) {

+		out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);

+		if (out == NULL)

+			return out; // stbi__convert_format frees input on failure

+	}

+	return out;

+}

+static int stbi__pnm_isspace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; }

+static void stbi__pnm_skip_whitespace(stbi__context* s, char* c) {

+	for (;;) {

+		while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))

+			*c = (char)stbi__get8(s);

+		if (stbi__at_eof(s) || *c != '#')

+			break;

+		while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')

+			*c = (char)stbi__get8(s);

+	}

+}

+static int stbi__pnm_isdigit(char c) { return c >= '0' && c <= '9'; }

+static int stbi__pnm_getinteger(stbi__context* s, char* c) {

+	int value = 0;

+	while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {

+		value = value * 10 + (*c - '0');

+		*c = (char)stbi__get8(s);

+	}

+	return value;

+}

+static int stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp) {

+	int maxv, dummy;

+	char c, p, t;

+	if (!x)

+		x = &dummy;

+	if (!y)

+		y = &dummy;

+	if (!comp)

+		comp = &dummy;

+	stbi__rewind(s);

+	// Get identifier

+	p = (char)stbi__get8(s);

+	t = (char)stbi__get8(s);

+	if (p != 'P' || (t != '5' && t != '6')) {

+		stbi__rewind(s);

+		return 0;

+	}

+	*comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm

+	c = (char)stbi__get8(s);

+	stbi__pnm_skip_whitespace(s, &c);

+	*x = stbi__pnm_getinteger(s, &c); // read width

+	stbi__pnm_skip_whitespace(s, &c);

+	*y = stbi__pnm_getinteger(s, &c); // read height

+	stbi__pnm_skip_whitespace(s, &c);

+	maxv = stbi__pnm_getinteger(s, &c); // read max value

+	if (maxv > 255)

+		return stbi__err("max value > 255", "PPM image not 8-bit");

+	else

+		return 1;

+}

+#endif

+static int stbi__info_main(stbi__context* s, int* x, int* y, int* comp) {

+#ifndef STBI_NO_JPEG

+	if (stbi__jpeg_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_PNG

+	if (stbi__png_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_GIF

+	if (stbi__gif_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_BMP

+	if (stbi__bmp_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_PSD

+	if (stbi__psd_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_PIC

+	if (stbi__pic_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_PNM

+	if (stbi__pnm_info(s, x, y, comp))

+		return 1;

+#endif

+#ifndef STBI_NO_HDR

+	if (stbi__hdr_info(s, x, y, comp))

+		return 1;

+#endif

+// test tga last because it's a crappy test!

+#ifndef STBI_NO_TGA

+	if (stbi__tga_info(s, x, y, comp))

+		return 1;

+#endif

+	return stbi__err("unknown image type", "Image not of any known type, or corrupt");

+}

+static int stbi__is_16_main(stbi__context* s) {

+#ifndef STBI_NO_PNG

+	if (stbi__png_is16(s))

+		return 1;

+#endif

+#ifndef STBI_NO_PSD

+	if (stbi__psd_is16(s))

+		return 1;

+#endif

+	return 0;

+}

+#ifndef STBI_NO_STDIO

+STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp) {

+	FILE* f = stbi__fopen(filename, "rb");

+	int result;

+	if (!f)

+		return stbi__err("can't fopen", "Unable to open file");

+	result = stbi_info_from_file(f, x, y, comp);

+	fclose(f);

+	return result;

+}

+STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp) {

+	int r;

+	stbi__context s;

+	long pos = ftell(f);

+	stbi__start_file(&s, f);

+	r = stbi__info_main(&s, x, y, comp);

+	fseek(f, pos, SEEK_SET);

+	return r;

+}

+STBIDEF int stbi_is_16_bit(char const* filename) {

+	FILE* f = stbi__fopen(filename, "rb");

+	int result;

+	if (!f)

+		return stbi__err("can't fopen", "Unable to open file");

+	result = stbi_is_16_bit_from_file(f);

+	fclose(f);

+	return result;

+}

+STBIDEF int stbi_is_16_bit_from_file(FILE* f) {

+	int r;

+	stbi__context s;

+	long pos = ftell(f);

+	stbi__start_file(&s, f);

+	r = stbi__is_16_main(&s);

+	fseek(f, pos, SEEK_SET);

+	return r;

+}

+#endif // !STBI_NO_STDIO

+STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp) {

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	return stbi__info_main(&s, x, y, comp);

+}

+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* c, void* user, int* x, int* y, int* comp) {

+	stbi__context s;

+	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);

+	return stbi__info_main(&s, x, y, comp);

+}

+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len) {

+	stbi__context s;

+	stbi__start_mem(&s, buffer, len);

+	return stbi__is_16_main(&s);

+}

+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* c, void* user) {

+	stbi__context s;

+	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);

+	return stbi__is_16_main(&s);

+}

+#endif // STB_IMAGE_IMPLEMENTATION

+/*

+   revision history:

+	  2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and

+   platform ifdefs 2.19  (2018-02-11) fix warning 2.18  (2018-01-30) fix

+   warnings 2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug

+						 1-bit BMP

+						 *_is_16_bit api

+						 avoid warnings

+	  2.16  (2017-07-23) all functions have 16-bit variants;

+						 STBI_NO_STDIO works again;

+						 compilation fixes;

+						 fix rounding in unpremultiply;

+						 optimize vertical flip;

+						 disable raw_len validation;

+						 documentation fixes

+	  2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;

+						 warning fixes; disable run-time SSE detection on gcc;

+						 uniform handling of optional "return" values;

+						 thread-safe initialization of zlib tables

+	  2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet

+   JPGs 2.13  (2016-11-29) add 16-bit API, only supported for PNG right now 2.12

+   (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11  (2016-04-02)

+   allocate large structures on the stack remove white matting for transparent

+   PSD fix reported channel count for PNG & BMP re-enable SSE2 in non-gcc 64-bit

+						 support RGB-formatted JPEG

+						 read 16-bit PNGs (only as 8-bit)

+	  2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED

+	  2.09  (2016-01-16) allow comments in PNM files

+						 16-bit-per-pixel TGA (not bit-per-component)

+						 info() for TGA could break due to .hdr handling

+						 info() for BMP to shares code instead of sloppy parse

+						 can use STBI_REALLOC_SIZED if allocator doesn't support

+   realloc code cleanup 2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD

+   as RGBA 2.07  (2015-09-13) fix compiler warnings partial animated GIF support

+						 limited 16-bpc PSD support

+						 #ifdef unused functions

+						 bug with < 92 byte PIC,PNM,HDR,TGA

+	  2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value

+	  2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning

+	  2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit

+	  2.03  (2015-04-12) extra corruption checking (mmozeiko)

+						 stbi_set_flip_vertically_on_load (nguillemot)

+						 fix NEON support; fix mingw support

+	  2.02  (2015-01-19) fix incorrect assert, fix warning

+	  2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit

+   without -msse2 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG 2.00

+   (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) progressive

+   JPEG (stb) PGM/PPM support (Ken Miller) STBI_MALLOC,STBI_REALLOC,STBI_FREE

+						 GIF bugfix -- seemingly never worked

+						 STBI_NO_*, STBI_ONLY_*

+	  1.48  (2014-12-14) fix incorrectly-named assert()

+	  1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar

+   Cornut & stb) optimize PNG (ryg) fix bug in interlaced PNG with

+   user-specified channel count (stb) 1.46  (2014-08-26) fix broken tRNS chunk

+   (colorkey-style transparency) in non-paletted PNG 1.45  (2014-08-16) fix

+   MSVC-ARM internal compiler error by wrapping malloc 1.44  (2014-08-07)

+			  various warning fixes from Ronny Chevalier

+	  1.43  (2014-07-15)

+			  fix MSVC-only compiler problem in code changed in 1.42

+	  1.42  (2014-07-09)

+			  don't define _CRT_SECURE_NO_WARNINGS (affects user code)

+			  fixes to stbi__cleanup_jpeg path

+			  added STBI_ASSERT to avoid requiring assert.h

+	  1.41  (2014-06-25)

+			  fix search&replace from 1.36 that messed up comments/error

+   messages 1.40  (2014-06-22) fix gcc struct-initialization warning 1.39

+   (2014-06-15) fix to TGA optimization when req_comp != number of components in

+   TGA; fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my

+   test suite) add support for BMP version 5 (more ignored fields) 1.38

+   (2014-06-06) suppress MSVC warnings on integer casts truncating values fix

+   accidental rename of 'skip' field of I/O 1.37  (2014-06-04) remove duplicate

+   typedef 1.36  (2014-06-03) convert to header file single-file library if

+   de-iphone isn't set, load iphone images color-swapped instead of returning

+   NULL 1.35  (2014-05-27) various warnings fix broken STBI_SIMD path fix bug

+   where stbi_load_from_file no longer left file pointer in correct place fix

+   broken non-easy path for 32-bit BMP (possibly never used) TGA optimization by

+   Arseny Kapoulkine 1.34  (unknown) use STBI_NOTUSED in

+   stbi__resample_row_generic(), fix one more leak in tga failure case 1.33

+   (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor

+   compiler-friendly improvements 1.32  (2011-07-13) support for "info" function

+   for all supported filetypes (SpartanJ) 1.31  (2011-06-20) a few more leak

+   fixes, bug in PNG handling (SpartanJ) 1.30  (2011-06-11) added ability to

+   load files via callbacks to accomidate custom input streams (Ben Wenger)

+			  removed deprecated format-specific test/load functions

+			  removed support for installable file formats (stbi_loader) --

+   would have been broken for IO callbacks anyway error cases in bmp and tga

+   give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in

+   decoding 32-bit BMP (David Woo) 1.29  (2010-08-16) various warning fixes from

+   Aurelien Pocheville 1.28  (2010-08-01) fix bug in GIF palette transparency

+   (SpartanJ) 1.27  (2010-08-01) cast-to-stbi_uc to fix warnings 1.26

+   (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25

+   (2010-07-17) refix trans_data warning (Won Chun) 1.24  (2010-07-12) perf

+   improvements reading from files on platforms with lock-heavy fgetc() minor

+   perf improvements for jpeg deprecated type-specific functions so we'll get

+   feedback if they're needed attempt to fix trans_data warning (Won Chun) 1.23

+   fixed bug in iPhone support 1.22  (2010-07-10) removed image *writing*

+   support stbi_info support from Jetro Lauha GIF support from Jean-Marc Lienher

+			  iPhone PNG-extensions from James Brown

+			  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err.

+   Janez (U+017D)emva) 1.21    fix use of 'stbi_uc' in header (reported by jon

+   blow) 1.20    added support for Softimage PIC, by Tom Seddon 1.19    bug in

+   interlaced PNG corruption check (found by ryg) 1.18  (2008-08-02) fix a

+   threading bug (local mutable static) 1.17    support interlaced PNG 1.16

+   major bugfix - stbi__convert_format converted one too many pixels 1.15

+   initialize some fields for thread safety 1.14    fix threadsafe conversion

+   bug header-file-only version (#define STBI_HEADER_FILE_ONLY before including)

+	  1.13    threadsafe

+	  1.12    const qualifiers in the API

+	  1.11    Support installable IDCT, colorspace conversion routines

+	  1.10    Fixes for 64-bit (don't use "unsigned long")

+			  optimized upsampling by Fabian "ryg" Giesen

+	  1.09    Fix format-conversion for PSD code (bad global variables!)

+	  1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz

+	  1.07    attempt to fix C++ warning/errors again

+	  1.06    attempt to fix C++ warning/errors again

+	  1.05    fix TGA loading to return correct *comp and use good luminance

+   calc 1.04    default float alpha is 1, not 255; use 'void *' for

+   stbi_image_free 1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR 1.02 support

+   for (subset of) HDR files, float interface for preferred access to them 1.01

+   fix bug: possible bug in handling right-side up bmps... not sure fix bug: the

+   stbi__bmp_load() and stbi__tga_load() functions didn't work at all 1.00

+   interface to zlib that skips zlib header 0.99    correct handling of alpha in

+   palette 0.98    TGA loader by lonesock; dynamically add loaders (untested)

+	  0.97    jpeg errors on too large a file; also catch another malloc failure

+	  0.96    fix detection of invalid v value - particleman@mollyrocket forum

+	  0.95    during header scan, seek to markers in case of padding

+	  0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same

+	  0.93    handle jpegtran output; verbose errors

+	  0.92    read 4,8,16,24,32-bit BMP files of several formats

+	  0.91    output 24-bit Windows 3.0 BMP files

+	  0.90    fix a few more warnings; bump version number to approach 1.0

+	  0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd

+	  0.60    fix compiling as c++

+	  0.59    fix warnings: merge Dave Moore's -Wall fixes

+	  0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian

+	  0.57    fix bug: jpg last huffman symbol before marker was >9 bits but

+   less than 16 available 0.56    fix bug: zlib uncompressed mode len vs. nlen

+	  0.55    fix bug: restart_interval not initialized to 0

+	  0.54    allow NULL for 'int *comp'

+	  0.53    fix bug in png 3->4; speedup png decoding

+	  0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments

+	  0.51    obey req_comp requests, 1-component jpegs return as 1-component,

+			  on 'test' only check type, not whether we support this variant

+	  0.50  (2006-11-19)

+			  first released version

+*/

+/*

+------------------------------------------------------------------------------

+This software is available under 2 licenses -- choose whichever you prefer.

+------------------------------------------------------------------------------

+ALTERNATIVE A - MIT License

+Copyright (c) 2017 Sean Barrett

+Permission is hereby granted, free of charge, to any person obtaining a copy of

+this software and associated documentation files (the "Software"), to deal in

+the Software without restriction, including without limitation the rights to

+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

+of the Software, and to permit persons to whom the Software is furnished to do

+so, subject to the following conditions:

+The above copyright notice and this permission notice shall be included in all

+copies or substantial portions of the Software.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+SOFTWARE.

+------------------------------------------------------------------------------

+ALTERNATIVE B - Public Domain (www.unlicense.org)

+This is free and unencumbered software released into the public domain.

+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

+software, either in source code form or as a compiled binary, for any purpose,

+commercial or non-commercial, and by any means.

+In jurisdictions that recognize copyright laws, the author or authors of this

+software dedicate any and all copyright interest in the software to the public

+domain. We make this dedication for the benefit of the public at large and to

+the detriment of our heirs and successors. We intend this dedication to be an

+overt act of relinquishment in perpetuity of all present and future rights to

+this software under copyright law.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+------------------------------------------------------------------------------

+*/

--- /dev/null

+++ b/include-demo/stb_image_write.h

@@ -1,0 +1,1733 @@

+/* stb_image_write - v1.14 - public domain - http://nothings.org/stb

+   writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015

+									 no warranty implied; use at your own risk

+   Before #including,

+	   #define STB_IMAGE_WRITE_IMPLEMENTATION

+   in the file that you want to have the implementation.

+   Will probably not work correctly with strict-aliasing optimizations.

+ABOUT:

+   This header file is a library for writing images to C stdio or a callback.

+   The PNG output is not optimal; it is 20-50% larger than the file

+   written by a decent optimizing implementation; though providing a custom

+   zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.

+   This library is designed for source code compactness and simplicity,

+   not optimal image file size or run-time performance.

+BUILDING:

+   You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.

+   You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace

+   malloc,realloc,free.

+   You can #define STBIW_MEMMOVE() to replace memmove()

+   You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress

+function for PNG compression (instead of the builtin one), it must have the

+following signature: unsigned char * my_compress(unsigned char *data, int

+data_len, int *out_len, int quality); The returned data will be freed with

+STBIW_FREE() (free() by default), so it must be heap allocated with

+STBIW_MALLOC() (malloc() by default),

+UNICODE:

+   If compiling for Windows and you wish to use Unicode filenames, compile

+   with

+	   #define STBIW_WINDOWS_UTF8

+   and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert

+   Windows wchar_t filenames to utf8.

+USAGE:

+   There are five functions, one for each image file format:

+	 int stbi_write_png(char const *filename, int w, int h, int comp, const void

+*data, int stride_in_bytes); int stbi_write_bmp(char const *filename, int w, int

+h, int comp, const void *data); int stbi_write_tga(char const *filename, int w,

+int h, int comp, const void *data); int stbi_write_jpg(char const *filename, int

+w, int h, int comp, const void *data, int quality); int stbi_write_hdr(char

+const *filename, int w, int h, int comp, const float *data);

+	 void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip

+data vertically

+   There are also five equivalent functions that use an arbitrary write

+function. You are expected to open/close your file-equivalent before and after

+calling these:

+	 int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int

+h, int comp, const void  *data, int stride_in_bytes); int

+stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int

+comp, const void  *data); int stbi_write_tga_to_func(stbi_write_func *func, void

+*context, int w, int h, int comp, const void  *data); int

+stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int

+comp, const float *data); int stbi_write_jpg_to_func(stbi_write_func *func, void

+*context, int x, int y, int comp, const void *data, int quality);

+   where the callback is:

+	  void stbi_write_func(void *context, void *data, int size);

+   You can configure it with these global variables:

+	  int stbi_write_tga_with_rle;             // defaults to true; set to 0 to

+disable RLE int stbi_write_png_compression_level;    // defaults to 8; set to

+higher for more compression int stbi_write_force_png_filter;         // defaults

+to -1; set to 0..5 to force a filter mode

+   You can define STBI_WRITE_NO_STDIO to disable the file variant of these

+   functions, so the library will not use stdio.h at all. However, this will

+   also disable HDR writing, because it requires stdio for formatted output.

+   Each function returns 0 on failure and non-0 on success.

+   The functions create an image file defined by the parameters. The image

+   is a rectangle of pixels stored from left-to-right, top-to-bottom.

+   Each pixel contains 'comp' channels of data stored interleaved with 8-bits

+   per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is

+   monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.

+   The *data pointer points to the first byte of the top-left-most pixel.

+   For PNG, "stride_in_bytes" is the distance in bytes from the first byte of

+   a row of pixels to the first byte of the next row of pixels.

+   PNG creates output files with the same number of components as the input.

+   The BMP format expands Y to RGB in the file format and does not

+   output alpha.

+   PNG supports writing rectangles of data even when the bytes storing rows of

+   data are not consecutive in memory (e.g. sub-rectangles of a larger image),

+   by supplying the stride between the beginning of adjacent rows. The other

+   formats do not. (Thus you cannot write a native-format BMP through the BMP

+   writer, both because it is in BGR order and because it may have padding

+   at the end of the line.)

+   PNG allows you to set the deflate compression level by setting the global

+   variable 'stbi_write_png_compression_level' (it defaults to 8).

+   HDR expects linear float data. Since the format is always 32-bit rgb(e)

+   data, alpha (if provided) is discarded, and for monochrome data it is

+   replicated across all three channels.

+   TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed

+   data, set the global variable 'stbi_write_tga_with_rle' to 0.

+   JPEG does ignore alpha channels in input data; quality is between 1 and 100.

+   Higher quality looks better but results in a bigger image.

+   JPEG baseline (no JPEG progressive).

+CREDITS:

+   Sean Barrett           -    PNG/BMP/TGA

+   Baldur Karlsson        -    HDR

+   Jean-Sebastien Guay    -    TGA monochrome

+   Tim Kelsey             -    misc enhancements

+   Alan Hickman           -    TGA RLE

+   Emmanuel Julien        -    initial file IO callback implementation

+   Jon Olick              -    original jo_jpeg.cpp code

+   Daniel Gibson          -    integrate JPEG, allow external zlib

+   Aarni Koskela          -    allow choosing PNG filter

+   bugfixes:

+	  github:Chribba

+	  Guillaume Chereau

+	  github:jry2

+	  github:romigrou

+	  Sergio Gonzalez

+	  Jonas Karlsson

+	  Filip Wasil

+	  Thatcher Ulrich

+	  github:poppolopoppo

+	  Patrick Boettcher

+	  github:xeekworx

+	  Cap Petschulat

+	  Simon Rodriguez

+	  Ivan Tikhonov

+	  github:ignotion

+	  Adam Schackart

+LICENSE

+  See end of file for license information.

+*/

+#ifndef INCLUDE_STB_IMAGE_WRITE_H

+#define INCLUDE_STB_IMAGE_WRITE_H

+#include <stdlib.h>

+// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline'

+// or 'static inline'

+#ifndef STBIWDEF

+#ifdef STB_IMAGE_WRITE_STATIC

+#define STBIWDEF static

+#else

+#ifdef __cplusplus

+#define STBIWDEF extern "C"

+#else

+#define STBIWDEF extern

+#endif

+#endif

+#endif

+#ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations

+extern int stbi_write_tga_with_rle;

+extern int stbi_write_png_compression_level;

+extern int stbi_write_force_png_filter;

+#endif

+#ifndef STBI_WRITE_NO_STDIO

+STBIWDEF int stbi_write_png(char const* filename, int w, int h, int comp, const void* data, int stride_in_bytes);

+STBIWDEF int stbi_write_bmp(char const* filename, int w, int h, int comp, const void* data);

+STBIWDEF int stbi_write_tga(char const* filename, int w, int h, int comp, const void* data);

+STBIWDEF int stbi_write_hdr(char const* filename, int w, int h, int comp, const float* data);

+STBIWDEF int stbi_write_jpg(char const* filename, int x, int y, int comp, const void* data, int quality);

+#ifdef STBI_WINDOWS_UTF8

+STBIWDEF int stbiw_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input);

+#endif

+#endif

+typedef void stbi_write_func(void* context, void* data, int size);

+STBIWDEF int stbi_write_png_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data, int stride_in_bytes);

+STBIWDEF int stbi_write_bmp_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data);

+STBIWDEF int stbi_write_tga_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const void* data);

+STBIWDEF int stbi_write_hdr_to_func(stbi_write_func* func, void* context, int w, int h, int comp, const float* data);

+STBIWDEF int stbi_write_jpg_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int quality);

+STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);

+#endif // INCLUDE_STB_IMAGE_WRITE_H

+#ifdef STB_IMAGE_WRITE_IMPLEMENTATION

+#ifdef _WIN32

+#ifndef _CRT_SECURE_NO_WARNINGS

+#define _CRT_SECURE_NO_WARNINGS

+#endif

+#ifndef _CRT_NONSTDC_NO_DEPRECATE

+#define _CRT_NONSTDC_NO_DEPRECATE

+#endif

+#endif

+#ifndef STBI_WRITE_NO_STDIO

+#include <stdio.h>

+#endif // STBI_WRITE_NO_STDIO

+#include <math.h>

+#include <stdarg.h>

+#include <stdlib.h>

+#include <string.h>

+#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))

+// ok

+#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)

+// ok

+#else

+#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."

+#endif

+#ifndef STBIW_MALLOC

+#define STBIW_MALLOC(sz) malloc(sz)

+#define STBIW_REALLOC(p, newsz) realloc(p, newsz)

+#define STBIW_FREE(p) free(p)

+#endif

+#ifndef STBIW_REALLOC_SIZED

+#define STBIW_REALLOC_SIZED(p, oldsz, newsz) STBIW_REALLOC(p, newsz)

+#endif

+#ifndef STBIW_MEMMOVE

+#define STBIW_MEMMOVE(a, b, sz) memmove(a, b, sz)

+#endif

+#ifndef STBIW_ASSERT

+#include <assert.h>

+#define STBIW_ASSERT(x) assert(x)

+#endif

+#define STBIW_UCHAR(x) (unsigned char)((x)&0xff)

+#ifdef STB_IMAGE_WRITE_STATIC

+static int stbi_write_png_compression_level = 8;

+static int stbi_write_tga_with_rle = 1;

+static int stbi_write_force_png_filter = -1;

+#else

+int stbi_write_png_compression_level = 8;

+int stbi_write_tga_with_rle = 1;

+int stbi_write_force_png_filter = -1;

+#endif

+static int stbi__flip_vertically_on_write = 0;

+STBIWDEF void stbi_flip_vertically_on_write(int flag) { stbi__flip_vertically_on_write = flag; }

+typedef struct {

+	stbi_write_func* func;

+	void* context;

+} stbi__write_context;

+// initialize a callback-based context

+static void stbi__start_write_callbacks(stbi__write_context* s, stbi_write_func* c, void* context) {

+	s->func = c;

+	s->context = context;

+}

+#ifndef STBI_WRITE_NO_STDIO

+static void stbi__stdio_write(void* context, void* data, int size) { fwrite(data, 1, size, (FILE*)context); }

+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

+#ifdef __cplusplus

+#define STBIW_EXTERN extern "C"

+#else

+#define STBIW_EXTERN extern

+#endif

+STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char* str, int cbmb, wchar_t* widestr,

+																	 int cchwide);

+STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t* widestr, int cchwide, char* str,

+																	 int cbmb, const char* defchar, int* used_default);

+STBIWDEF int stbiw_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input) {

+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);

+}

+#endif

+static FILE* stbiw__fopen(char const* filename, char const* mode) {

+	FILE* f;

+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)

+	wchar_t wMode[64];

+	wchar_t wFilename[1024];

+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))

+		return 0;

+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))

+		return 0;

+#if _MSC_VER >= 1400

+	if (0 != _wfopen_s(&f, wFilename, wMode))

+		f = 0;

+#else

+	f = _wfopen(wFilename, wMode);

+#endif

+#elif defined(_MSC_VER) && _MSC_VER >= 1400

+	if (0 != fopen_s(&f, filename, mode))

+		f = 0;

+#else

+	f = fopen(filename, mode);

+#endif

+	return f;

+}

+static int stbi__start_write_file(stbi__write_context* s, const char* filename) {

+	FILE* f = stbiw__fopen(filename, "wb");

+	stbi__start_write_callbacks(s, stbi__stdio_write, (void*)f);

+	return f != NULL;

+}

+static void stbi__end_write_file(stbi__write_context* s) { fclose((FILE*)s->context); }

+#endif // !STBI_WRITE_NO_STDIO

+typedef unsigned int stbiw_uint32;

+typedef int stb_image_write_test[sizeof(stbiw_uint32) == 4 ? 1 : -1];

+static void stbiw__writefv(stbi__write_context* s, const char* fmt, va_list v) {

+	while (*fmt) {

+		switch (*fmt++) {

+		case ' ':

+			break;

+		case '1': {

+			unsigned char x = STBIW_UCHAR(va_arg(v, int));

+			s->func(s->context, &x, 1);

+			break;

+		}

+		case '2': {

+			int x = va_arg(v, int);

+			unsigned char b[2];

+			b[0] = STBIW_UCHAR(x);

+			b[1] = STBIW_UCHAR(x >> 8);

+			s->func(s->context, b, 2);

+			break;

+		}

+		case '4': {

+			stbiw_uint32 x = va_arg(v, int);

+			unsigned char b[4];

+			b[0] = STBIW_UCHAR(x);

+			b[1] = STBIW_UCHAR(x >> 8);

+			b[2] = STBIW_UCHAR(x >> 16);

+			b[3] = STBIW_UCHAR(x >> 24);

+			s->func(s->context, b, 4);

+			break;

+		}

+		default:

+			STBIW_ASSERT(0);

+			return;

+		}

+	}

+}

+static void stbiw__writef(stbi__write_context* s, const char* fmt, ...) {

+	va_list v;

+	va_start(v, fmt);

+	stbiw__writefv(s, fmt, v);

+	va_end(v);

+}

+static void stbiw__putc(stbi__write_context* s, unsigned char c) { s->func(s->context, &c, 1); }

+static void stbiw__write3(stbi__write_context* s, unsigned char a, unsigned char b, unsigned char c) {

+	unsigned char arr[3];

+	arr[0] = a;

+	arr[1] = b;

+	arr[2] = c;

+	s->func(s->context, arr, 3);

+}

+static void stbiw__write_pixel(stbi__write_context* s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char* d) {

+	unsigned char bg[3] = {255, 0, 255}, px[3];

+	int k;

+	if (write_alpha < 0)

+		s->func(s->context, &d[comp - 1], 1);

+	switch (comp) {

+	case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as

+			// 1-channel case

+	case 1:

+		if (expand_mono)

+			stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp

+		else

+			s->func(s->context, d, 1); // monochrome TGA

+		break;

+	case 4:

+		if (!write_alpha) {

+			// composite against pink background

+			for (k = 0; k < 3; ++k)

+				px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;

+			stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);

+			break;

+		}

+		/* FALLTHROUGH */

+	case 3:

+		stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);

+		break;

+	}

+	if (write_alpha > 0)

+		s->func(s->context, &d[comp - 1], 1);

+}

+static void stbiw__write_pixels(stbi__write_context* s, int rgb_dir, int vdir, int x, int y, int comp, void* data, int write_alpha, int scanline_pad,

+								int expand_mono) {

+	stbiw_uint32 zero = 0;

+	int i, j, j_end;

+	if (y <= 0)

+		return;

+	if (stbi__flip_vertically_on_write)

+		vdir *= -1;

+	if (vdir < 0) {

+		j_end = -1;

+		j = y - 1;

+	} else {

+		j_end = y;

+		j = 0;

+	}

+	for (; j != j_end; j += vdir) {

+		for (i = 0; i < x; ++i) {

+			unsigned char* d = (unsigned char*)data + (j * x + i) * comp;

+			stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);

+		}

+		s->func(s->context, &zero, scanline_pad);

+	}

+}

+static int stbiw__outfile(stbi__write_context* s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void* data, int alpha, int pad,

+						  const char* fmt, ...) {

+	if (y < 0 || x < 0) {

+		return 0;

+	} else {

+		va_list v;

+		va_start(v, fmt);

+		stbiw__writefv(s, fmt, v);

+		va_end(v);

+		stbiw__write_pixels(s, rgb_dir, vdir, x, y, comp, data, alpha, pad, expand_mono);

+		return 1;

+	}

+}

+static int stbi_write_bmp_core(stbi__write_context* s, int x, int y, int comp, const void* data) {

+	int pad = (-x * 3) & 3;

+	return stbiw__outfile(s, -1, -1, x, y, comp, 1, (void*)data, 0, pad,

+						  "11 4 22 4"

+						  "4 44 22 444444",

+						  'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0,

+						  14 + 40,							  // file header

+						  40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header

+}

+STBIWDEF int stbi_write_bmp_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data) {

+	stbi__write_context s;

+	stbi__start_write_callbacks(&s, func, context);

+	return stbi_write_bmp_core(&s, x, y, comp, data);

+}

+#ifndef STBI_WRITE_NO_STDIO

+STBIWDEF int stbi_write_bmp(char const* filename, int x, int y, int comp, const void* data) {

+	stbi__write_context s;

+	if (stbi__start_write_file(&s, filename)) {

+		int r = stbi_write_bmp_core(&s, x, y, comp, data);

+		stbi__end_write_file(&s);

+		return r;

+	} else

+		return 0;

+}

+#endif //! STBI_WRITE_NO_STDIO

+static int stbi_write_tga_core(stbi__write_context* s, int x, int y, int comp, void* data) {

+	int has_alpha = (comp == 2 || comp == 4);

+	int colorbytes = has_alpha ? comp - 1 : comp;

+	int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3

+	if (y < 0 || x < 0)

+		return 0;

+	if (!stbi_write_tga_with_rle) {

+		return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void*)data, has_alpha, 0, "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y,

+							  (colorbytes + has_alpha) * 8, has_alpha * 8);

+	} else {

+		int i, j, k;

+		int jend, jdir;

+		stbiw__writef(s, "111 221 2222 11", 0, 0, format + 8, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);

+		if (stbi__flip_vertically_on_write) {

+			j = 0;

+			jend = y;

+			jdir = 1;

+		} else {

+			j = y - 1;

+			jend = -1;

+			jdir = -1;

+		}

+		for (; j != jend; j += jdir) {

+			unsigned char* row = (unsigned char*)data + j * x * comp;

+			int len;

+			for (i = 0; i < x; i += len) {

+				unsigned char* begin = row + i * comp;

+				int diff = 1;

+				len = 1;

+				if (i < x - 1) {

+					++len;

+					diff = memcmp(begin, row + (i + 1) * comp, comp);

+					if (diff) {

+						const unsigned char* prev = begin;

+						for (k = i + 2; k < x && len < 128; ++k) {

+							if (memcmp(prev, row + k * comp, comp)) {

+								prev += comp;

+								++len;

+							} else {

+								--len;

+								break;

+							}

+						}

+					} else {

+						for (k = i + 2; k < x && len < 128; ++k) {

+							if (!memcmp(begin, row + k * comp, comp)) {

+								++len;

+							} else {

+								break;

+							}

+						}

+					}

+				}

+				if (diff) {

+					unsigned char header = STBIW_UCHAR(len - 1);

+					s->func(s->context, &header, 1);

+					for (k = 0; k < len; ++k) {

+						stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);

+					}

+				} else {

+					unsigned char header = STBIW_UCHAR(len - 129);

+					s->func(s->context, &header, 1);

+					stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);

+				}

+			}

+		}

+	}

+	return 1;

+}

+STBIWDEF int stbi_write_tga_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data) {

+	stbi__write_context s;

+	stbi__start_write_callbacks(&s, func, context);

+	return stbi_write_tga_core(&s, x, y, comp, (void*)data);

+}

+#ifndef STBI_WRITE_NO_STDIO

+STBIWDEF int stbi_write_tga(char const* filename, int x, int y, int comp, const void* data) {

+	stbi__write_context s;

+	if (stbi__start_write_file(&s, filename)) {

+		int r = stbi_write_tga_core(&s, x, y, comp, (void*)data);

+		stbi__end_write_file(&s);

+		return r;

+	} else

+		return 0;

+}

+#endif

+// *************************************************************************************************

+// Radiance RGBE HDR writer

+// by Baldur Karlsson

+#define stbiw__max(a, b) ((a) > (b) ? (a) : (b))

+static void stbiw__linear_to_rgbe(unsigned char* rgbe, float* linear) {

+	int exponent;

+	float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));

+	if (maxcomp < 1e-32f) {

+		rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;

+	} else {

+		float normalize = (float)frexp(maxcomp, &exponent) * 256.0f / maxcomp;

+		rgbe[0] = (unsigned char)(linear[0] * normalize);

+		rgbe[1] = (unsigned char)(linear[1] * normalize);

+		rgbe[2] = (unsigned char)(linear[2] * normalize);

+		rgbe[3] = (unsigned char)(exponent + 128);

+	}

+}

+static void stbiw__write_run_data(stbi__write_context* s, int length, unsigned char databyte) {

+	unsigned char lengthbyte = STBIW_UCHAR(length + 128);

+	STBIW_ASSERT(length + 128 <= 255);

+	s->func(s->context, &lengthbyte, 1);

+	s->func(s->context, &databyte, 1);

+}

+static void stbiw__write_dump_data(stbi__write_context* s, int length, unsigned char* data) {

+	unsigned char lengthbyte = STBIW_UCHAR(length);

+	STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code

+	s->func(s->context, &lengthbyte, 1);

+	s->func(s->context, data, length);

+}

+static void stbiw__write_hdr_scanline(stbi__write_context* s, int width, int ncomp, unsigned char* scratch, float* scanline) {

+	unsigned char scanlineheader[4] = {2, 2, 0, 0};

+	unsigned char rgbe[4];

+	float linear[3];

+	int x;

+	scanlineheader[2] = (width & 0xff00) >> 8;

+	scanlineheader[3] = (width & 0x00ff);

+	/* skip RLE for images too small or large */

+	if (width < 8 || width >= 32768) {

+		for (x = 0; x < width; x++) {

+			switch (ncomp) {

+			case 4: /* fallthrough */

+			case 3:

+				linear[2] = scanline[x * ncomp + 2];

+				linear[1] = scanline[x * ncomp + 1];

+				linear[0] = scanline[x * ncomp + 0];

+				break;

+			default:

+				linear[0] = linear[1] = linear[2] = scanline[x * ncomp + 0];

+				break;

+			}

+			stbiw__linear_to_rgbe(rgbe, linear);

+			s->func(s->context, rgbe, 4);

+		}

+	} else {

+		int c, r;

+		/* encode into scratch buffer */

+		for (x = 0; x < width; x++) {

+			switch (ncomp) {

+			case 4: /* fallthrough */

+			case 3:

+				linear[2] = scanline[x * ncomp + 2];

+				linear[1] = scanline[x * ncomp + 1];

+				linear[0] = scanline[x * ncomp + 0];

+				break;

+			default:

+				linear[0] = linear[1] = linear[2] = scanline[x * ncomp + 0];

+				break;

+			}

+			stbiw__linear_to_rgbe(rgbe, linear);

+			scratch[x + width * 0] = rgbe[0];

+			scratch[x + width * 1] = rgbe[1];

+			scratch[x + width * 2] = rgbe[2];

+			scratch[x + width * 3] = rgbe[3];

+		}

+		s->func(s->context, scanlineheader, 4);

+		/* RLE each component separately */

+		for (c = 0; c < 4; c++) {

+			unsigned char* comp = &scratch[width * c];

+			x = 0;

+			while (x < width) {

+				// find first run

+				r = x;

+				while (r + 2 < width) {

+					if (comp[r] == comp[r + 1] && comp[r] == comp[r + 2])

+						break;

+					++r;

+				}

+				if (r + 2 >= width)

+					r = width;

+				// dump up to first run

+				while (x < r) {

+					int len = r - x;

+					if (len > 128)

+						len = 128;

+					stbiw__write_dump_data(s, len, &comp[x]);

+					x += len;

+				}

+				// if there's a run, output it

+				if (r + 2 < width) { // same test as what we break out of in

+									 // search loop, so only true if we break'd

+					// find next byte after run

+					while (r < width && comp[r] == comp[x])

+						++r;

+					// output run up to r

+					while (x < r) {

+						int len = r - x;

+						if (len > 127)

+							len = 127;

+						stbiw__write_run_data(s, len, comp[x]);

+						x += len;

+					}

+				}

+			}

+		}

+	}

+}

+static int stbi_write_hdr_core(stbi__write_context* s, int x, int y, int comp, float* data) {

+	if (y <= 0 || x <= 0 || data == NULL)

+		return 0;

+	else {

+		// Each component is stored separately. Allocate scratch space for full

+		// output scanline.

+		unsigned char* scratch = (unsigned char*)STBIW_MALLOC(x * 4);

+		int i, len;

+		char buffer[128];

+		char header[] = "#?RADIANCE\n# Written by "

+						"stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";

+		s->func(s->context, header, sizeof(header) - 1);

+#ifdef __STDC_WANT_SECURE_LIB__

+		len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);

+#else

+		len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);

+#endif

+		s->func(s->context, buffer, len);

+		for (i = 0; i < y; i++)

+			stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp * x * (stbi__flip_vertically_on_write ? y - 1 - i : i));

+		STBIW_FREE(scratch);

+		return 1;

+	}

+}

+STBIWDEF int stbi_write_hdr_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const float* data) {

+	stbi__write_context s;

+	stbi__start_write_callbacks(&s, func, context);

+	return stbi_write_hdr_core(&s, x, y, comp, (float*)data);

+}

+#ifndef STBI_WRITE_NO_STDIO

+STBIWDEF int stbi_write_hdr(char const* filename, int x, int y, int comp, const float* data) {

+	stbi__write_context s;

+	if (stbi__start_write_file(&s, filename)) {

+		int r = stbi_write_hdr_core(&s, x, y, comp, (float*)data);

+		stbi__end_write_file(&s);

+		return r;

+	} else

+		return 0;

+}

+#endif // STBI_WRITE_NO_STDIO

+//////////////////////////////////////////////////////////////////////////////

+//

+// PNG writer

+//

+#ifndef STBIW_ZLIB_COMPRESS

+// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount()

+// == vector<>::size()

+#define stbiw__sbraw(a) ((int*)(void*)(a)-2)

+#define stbiw__sbm(a) stbiw__sbraw(a)[0]

+#define stbiw__sbn(a) stbiw__sbraw(a)[1]

+#define stbiw__sbneedgrow(a, n) ((a) == 0 || stbiw__sbn(a) + n >= stbiw__sbm(a))

+#define stbiw__sbmaybegrow(a, n) (stbiw__sbneedgrow(a, (n)) ? stbiw__sbgrow(a, n) : 0)

+#define stbiw__sbgrow(a, n) stbiw__sbgrowf((void**)&(a), (n), sizeof(*(a)))

+#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a, 1), (a)[stbiw__sbn(a)++] = (v))

+#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)

+#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)), 0 : 0)

+static void* stbiw__sbgrowf(void** arr, int increment, int itemsize) {

+	int m = *arr ? 2 * stbiw__sbm(*arr) + increment : increment + 1;

+	void* p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr) * itemsize + sizeof(int) * 2) : 0, itemsize * m + sizeof(int) * 2);

+	STBIW_ASSERT(p);

+	if (p) {

+		if (!*arr)

+			((int*)p)[1] = 0;

+		*arr = (void*)((int*)p + 2);

+		stbiw__sbm(*arr) = m;

+	}

+	return *arr;

+}

+static unsigned char* stbiw__zlib_flushf(unsigned char* data, unsigned int* bitbuffer, int* bitcount) {

+	while (*bitcount >= 8) {

+		stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));

+		*bitbuffer >>= 8;

+		*bitcount -= 8;

+	}

+	return data;

+}

+static int stbiw__zlib_bitrev(int code, int codebits) {

+	int res = 0;

+	while (codebits--) {

+		res = (res << 1) | (code & 1);

+		code >>= 1;

+	}

+	return res;

+}

+static unsigned int stbiw__zlib_countm(unsigned char* a, unsigned char* b, int limit) {

+	int i;

+	for (i = 0; i < limit && i < 258; ++i)

+		if (a[i] != b[i])

+			break;

+	return i;

+}

+static unsigned int stbiw__zhash(unsigned char* data) {

+	stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);

+	hash ^= hash << 3;

+	hash += hash >> 5;

+	hash ^= hash << 4;

+	hash += hash >> 17;

+	hash ^= hash << 25;

+	hash += hash >> 6;

+	return hash;

+}

+#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))

+#define stbiw__zlib_add(code, codebits) (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())

+#define stbiw__zlib_huffa(b, c) stbiw__zlib_add(stbiw__zlib_bitrev(b, c), c)

+// default huffman tables

+#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)

+#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)

+#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256, 7)

+#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280, 8)

+#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))

+#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))

+#define stbiw__ZHASH 16384

+#endif // STBIW_ZLIB_COMPRESS

+STBIWDEF unsigned char* stbi_zlib_compress(unsigned char* data, int data_len, int* out_len, int quality) {

+#ifdef STBIW_ZLIB_COMPRESS

+	// user provided a zlib compress implementation, use that

+	return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);

+#else  // use builtin

+	static unsigned short lengthc[] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 259};

+	static unsigned char lengtheb[] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};

+	static unsigned short distc[] = {1,   2,   3,   4,   5,	7,	9,	13,   17,   25,   33,   49,	65,	97,	129,  193,

+									 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 32768};

+	static unsigned char disteb[] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};

+	unsigned int bitbuf = 0;

+	int i, j, bitcount = 0;

+	unsigned char* out = NULL;

+	unsigned char*** hash_table = (unsigned char***)STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**));

+	if (hash_table == NULL)

+		return NULL;

+	if (quality < 5)

+		quality = 5;

+	stbiw__sbpush(out, 0x78); // DEFLATE 32K window

+	stbiw__sbpush(out, 0x5e); // FLEVEL = 1

+	stbiw__zlib_add(1, 1);	// BFINAL = 1

+	stbiw__zlib_add(1, 2);	// BTYPE = 1 -- fixed huffman

+	for (i = 0; i < stbiw__ZHASH; ++i)

+		hash_table[i] = NULL;

+	i = 0;

+	while (i < data_len - 3) {

+		// hash next 3 bytes of data to be compressed

+		int h = stbiw__zhash(data + i) & (stbiw__ZHASH - 1), best = 3;

+		unsigned char* bestloc = 0;

+		unsigned char** hlist = hash_table[h];

+		int n = stbiw__sbcount(hlist);

+		for (j = 0; j < n; ++j) {

+			if (hlist[j] - data > i - 32768) { // if entry lies within window

+				int d = stbiw__zlib_countm(hlist[j], data + i, data_len - i);

+				if (d >= best) {

+					best = d;

+					bestloc = hlist[j];

+				}

+			}

+		}

+		// when hash table entry is too long, delete half the entries

+		if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2 * quality) {

+			STBIW_MEMMOVE(hash_table[h], hash_table[h] + quality, sizeof(hash_table[h][0]) * quality);

+			stbiw__sbn(hash_table[h]) = quality;

+		}

+		stbiw__sbpush(hash_table[h], data + i);

+		if (bestloc) {

+			// "lazy matching" - check match at *next* byte, and if it's better,

+			// do cur byte as literal

+			h = stbiw__zhash(data + i + 1) & (stbiw__ZHASH - 1);

+			hlist = hash_table[h];

+			n = stbiw__sbcount(hlist);

+			for (j = 0; j < n; ++j) {

+				if (hlist[j] - data > i - 32767) {

+					int e = stbiw__zlib_countm(hlist[j], data + i + 1, data_len - i - 1);

+					if (e > best) { // if next match is better, bail on current

+									// match

+						bestloc = NULL;

+						break;

+					}

+				}

+			}

+		}

+		if (bestloc) {

+			int d = (int)(data + i - bestloc); // distance back

+			STBIW_ASSERT(d <= 32767 && best <= 258);

+			for (j = 0; best > lengthc[j + 1] - 1; ++j)

+				;

+			stbiw__zlib_huff(j + 257);

+			if (lengtheb[j])

+				stbiw__zlib_add(best - lengthc[j], lengtheb[j]);

+			for (j = 0; d > distc[j + 1] - 1; ++j)

+				;

+			stbiw__zlib_add(stbiw__zlib_bitrev(j, 5), 5);

+			if (disteb[j])

+				stbiw__zlib_add(d - distc[j], disteb[j]);

+			i += best;

+		} else {

+			stbiw__zlib_huffb(data[i]);

+			++i;

+		}

+	}

+	// write out final bytes

+	for (; i < data_len; ++i)

+		stbiw__zlib_huffb(data[i]);

+	stbiw__zlib_huff(256); // end of block

+	// pad with 0 bits to byte boundary

+	while (bitcount)

+		stbiw__zlib_add(0, 1);

+	for (i = 0; i < stbiw__ZHASH; ++i)

+		(void)stbiw__sbfree(hash_table[i]);

+	STBIW_FREE(hash_table);

+	{

+		// compute adler32 on input

+		unsigned int s1 = 1, s2 = 0;

+		int blocklen = (int)(data_len % 5552);

+		j = 0;

+		while (j < data_len) {

+			for (i = 0; i < blocklen; ++i) {

+				s1 += data[j + i];

+				s2 += s1;

+			}

+			s1 %= 65521;

+			s2 %= 65521;

+			j += blocklen;

+			blocklen = 5552;

+		}

+		stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));

+		stbiw__sbpush(out, STBIW_UCHAR(s2));

+		stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));

+		stbiw__sbpush(out, STBIW_UCHAR(s1));

+	}

+	*out_len = stbiw__sbn(out);

+	// make returned pointer freeable

+	STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);

+	return (unsigned char*)stbiw__sbraw(out);

+#endif // STBIW_ZLIB_COMPRESS

+}

+static unsigned int stbiw__crc32(unsigned char* buffer, int len) {

+#ifdef STBIW_CRC32

+	return STBIW_CRC32(buffer, len);

+#else

+	static unsigned int crc_table[256] = {

+		0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,

+		0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,

+		0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,

+		0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,

+		0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,

+		0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,

+		0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,

+		0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,

+		0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,

+		0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,

+		0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,

+		0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,

+		0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,

+		0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,

+		0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,

+		0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,

+		0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,

+		0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,

+		0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,

+		0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,

+		0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,

+		0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D};

+	unsigned int crc = ~0u;

+	int i;

+	for (i = 0; i < len; ++i)

+		crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];

+	return ~crc;

+#endif

+}

+#define stbiw__wpng4(o, a, b, c, d) ((o)[0] = STBIW_UCHAR(a), (o)[1] = STBIW_UCHAR(b), (o)[2] = STBIW_UCHAR(c), (o)[3] = STBIW_UCHAR(d), (o) += 4)

+#define stbiw__wp32(data, v) stbiw__wpng4(data, (v) >> 24, (v) >> 16, (v) >> 8, (v));

+#define stbiw__wptag(data, s) stbiw__wpng4(data, s[0], s[1], s[2], s[3])

+static void stbiw__wpcrc(unsigned char** data, int len) {

+	unsigned int crc = stbiw__crc32(*data - len - 4, len + 4);

+	stbiw__wp32(*data, crc);

+}

+static unsigned char stbiw__paeth(int a, int b, int c) {

+	int p = a + b - c, pa = abs(p - a), pb = abs(p - b), pc = abs(p - c);

+	if (pa <= pb && pa <= pc)

+		return STBIW_UCHAR(a);

+	if (pb <= pc)

+		return STBIW_UCHAR(b);

+	return STBIW_UCHAR(c);

+}

+// @OPTIMIZE: provide an option that always forces left-predict or paeth predict

+static void stbiw__encode_png_line(unsigned char* pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char* line_buffer) {

+	static int mapping[] = {0, 1, 2, 3, 4};

+	static int firstmap[] = {0, 1, 0, 5, 6};

+	int* mymap = (y != 0) ? mapping : firstmap;

+	int i;

+	int type = mymap[filter_type];

+	unsigned char* z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height - 1 - y : y);

+	int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;

+	if (type == 0) {

+		memcpy(line_buffer, z, width * n);

+		return;

+	}

+	// first loop isn't optimized since it's just one pixel

+	for (i = 0; i < n; ++i) {

+		switch (type) {

+		case 1:

+			line_buffer[i] = z[i];

+			break;

+		case 2:

+			line_buffer[i] = z[i] - z[i - signed_stride];

+			break;

+		case 3:

+			line_buffer[i] = z[i] - (z[i - signed_stride] >> 1);

+			break;

+		case 4:

+			line_buffer[i] = (signed char)(z[i] - stbiw__paeth(0, z[i - signed_stride], 0));

+			break;

+		case 5:

+			line_buffer[i] = z[i];

+			break;

+		case 6:

+			line_buffer[i] = z[i];

+			break;

+		}

+	}

+	switch (type) {

+	case 1:

+		for (i = n; i < width * n; ++i)

+			line_buffer[i] = z[i] - z[i - n];

+		break;

+	case 2:

+		for (i = n; i < width * n; ++i)

+			line_buffer[i] = z[i] - z[i - signed_stride];

+		break;

+	case 3:

+		for (i = n; i < width * n; ++i)

+			line_buffer[i] = z[i] - ((z[i - n] + z[i - signed_stride]) >> 1);

+		break;

+	case 4:

+		for (i = n; i < width * n; ++i)

+			line_buffer[i] = z[i] - stbiw__paeth(z[i - n], z[i - signed_stride], z[i - signed_stride - n]);

+		break;

+	case 5:

+		for (i = n; i < width * n; ++i)

+			line_buffer[i] = z[i] - (z[i - n] >> 1);

+		break;

+	case 6:

+		for (i = n; i < width * n; ++i)

+			line_buffer[i] = z[i] - stbiw__paeth(z[i - n], 0, 0);

+		break;

+	}

+}

+STBIWDEF unsigned char* stbi_write_png_to_mem(const unsigned char* pixels, int stride_bytes, int x, int y, int n, int* out_len) {

+	int force_filter = stbi_write_force_png_filter;

+	int ctype[5] = {-1, 0, 4, 2, 6};

+	unsigned char sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};

+	unsigned char *out, *o, *filt, *zlib;

+	signed char* line_buffer;

+	int j, zlen;

+	if (stride_bytes == 0)

+		stride_bytes = x * n;

+	if (force_filter >= 5) {

+		force_filter = -1;

+	}

+	filt = (unsigned char*)STBIW_MALLOC((x * n + 1) * y);

+	if (!filt)

+		return 0;

+	line_buffer = (signed char*)STBIW_MALLOC(x * n);

+	if (!line_buffer) {

+		STBIW_FREE(filt);

+		return 0;

+	}

+	for (j = 0; j < y; ++j) {

+		int filter_type;

+		if (force_filter > -1) {

+			filter_type = force_filter;

+			stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);

+		} else { // Estimate the best filter by running through all of them:

+			int best_filter = 0, best_filter_val = 0x7fffffff, est, i;

+			for (filter_type = 0; filter_type < 5; filter_type++) {

+				stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);

+				// Estimate the entropy of the line using this filter; the less,

+				// the better.

+				est = 0;

+				for (i = 0; i < x * n; ++i) {

+					est += abs((signed char)line_buffer[i]);

+				}

+				if (est < best_filter_val) {

+					best_filter_val = est;

+					best_filter = filter_type;

+				}

+			}

+			if (filter_type != best_filter) { // If the last iteration already got us

+											  // the best filter, don't redo it

+				stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);

+				filter_type = best_filter;

+			}

+		}

+		// when we get here, filter_type contains the filter type, and

+		// line_buffer contains the data

+		filt[j * (x * n + 1)] = (unsigned char)filter_type;

+		STBIW_MEMMOVE(filt + j * (x * n + 1) + 1, line_buffer, x * n);

+	}

+	STBIW_FREE(line_buffer);

+	zlib = stbi_zlib_compress(filt, y * (x * n + 1), &zlen, stbi_write_png_compression_level);

+	STBIW_FREE(filt);

+	if (!zlib)

+		return 0;

+	// each tag requires 12 bytes of overhead

+	out = (unsigned char*)STBIW_MALLOC(8 + 12 + 13 + 12 + zlen + 12);

+	if (!out)

+		return 0;

+	*out_len = 8 + 12 + 13 + 12 + zlen + 12;

+	o = out;

+	STBIW_MEMMOVE(o, sig, 8);

+	o += 8;

+	stbiw__wp32(o, 13); // header length

+	stbiw__wptag(o, "IHDR");

+	stbiw__wp32(o, x);

+	stbiw__wp32(o, y);

+	*o++ = 8;

+	*o++ = STBIW_UCHAR(ctype[n]);

+	*o++ = 0;

+	*o++ = 0;

+	*o++ = 0;

+	stbiw__wpcrc(&o, 13);

+	stbiw__wp32(o, zlen);

+	stbiw__wptag(o, "IDAT");

+	STBIW_MEMMOVE(o, zlib, zlen);

+	o += zlen;

+	STBIW_FREE(zlib);

+	stbiw__wpcrc(&o, zlen);

+	stbiw__wp32(o, 0);

+	stbiw__wptag(o, "IEND");

+	stbiw__wpcrc(&o, 0);

+	STBIW_ASSERT(o == out + *out_len);

+	return out;

+}

+#ifndef STBI_WRITE_NO_STDIO

+STBIWDEF int stbi_write_png(char const* filename, int x, int y, int comp, const void* data, int stride_bytes) {

+	FILE* f;

+	int len;

+	unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len);

+	if (png == NULL)

+		return 0;

+	f = stbiw__fopen(filename, "wb");

+	if (!f) {

+		STBIW_FREE(png);

+		return 0;

+	}

+	fwrite(png, 1, len, f);

+	fclose(f);

+	STBIW_FREE(png);

+	return 1;

+}

+#endif

+STBIWDEF int stbi_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes) {

+	int len;

+	unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len);

+	if (png == NULL)

+		return 0;

+	func(context, png, len);

+	STBIW_FREE(png);

+	return 1;

+}

+/* ***************************************************************************

+ *

+ * JPEG writer

+ *

+ * This is based on Jon Olick's jo_jpeg.cpp:

+ * public domain Simple, Minimalistic JPEG writer -

+ * http://www.jonolick.com/code.html

+ */

+static const unsigned char stbiw__jpg_ZigZag[] = {0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42, 3,  8,  12, 17, 25, 30,

+												  41, 43, 9,  11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38,

+												  46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};

+static void stbiw__jpg_writeBits(stbi__write_context* s, int* bitBufP, int* bitCntP, const unsigned short* bs) {

+	int bitBuf = *bitBufP, bitCnt = *bitCntP;

+	bitCnt += bs[1];

+	bitBuf |= bs[0] << (24 - bitCnt);

+	while (bitCnt >= 8) {

+		unsigned char c = (bitBuf >> 16) & 255;

+		stbiw__putc(s, c);

+		if (c == 255) {

+			stbiw__putc(s, 0);

+		}

+		bitBuf <<= 8;

+		bitCnt -= 8;

+	}

+	*bitBufP = bitBuf;

+	*bitCntP = bitCnt;

+}

+static void stbiw__jpg_DCT(float* d0p, float* d1p, float* d2p, float* d3p, float* d4p, float* d5p, float* d6p, float* d7p) {

+	float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;

+	float z1, z2, z3, z4, z5, z11, z13;

+	float tmp0 = d0 + d7;

+	float tmp7 = d0 - d7;

+	float tmp1 = d1 + d6;

+	float tmp6 = d1 - d6;

+	float tmp2 = d2 + d5;

+	float tmp5 = d2 - d5;

+	float tmp3 = d3 + d4;

+	float tmp4 = d3 - d4;

+	// Even part

+	float tmp10 = tmp0 + tmp3; // phase 2

+	float tmp13 = tmp0 - tmp3;

+	float tmp11 = tmp1 + tmp2;

+	float tmp12 = tmp1 - tmp2;

+	d0 = tmp10 + tmp11; // phase 3

+	d4 = tmp10 - tmp11;

+	z1 = (tmp12 + tmp13) * 0.707106781f; // c4

+	d2 = tmp13 + z1;					 // phase 5

+	d6 = tmp13 - z1;

+	// Odd part

+	tmp10 = tmp4 + tmp5; // phase 2

+	tmp11 = tmp5 + tmp6;

+	tmp12 = tmp6 + tmp7;

+	// The rotator is modified from fig 4-8 to avoid extra negations.

+	z5 = (tmp10 - tmp12) * 0.382683433f; // c6

+	z2 = tmp10 * 0.541196100f + z5;		 // c2-c6

+	z4 = tmp12 * 1.306562965f + z5;		 // c2+c6

+	z3 = tmp11 * 0.707106781f;			 // c4

+	z11 = tmp7 + z3; // phase 5

+	z13 = tmp7 - z3;

+	*d5p = z13 + z2; // phase 6

+	*d3p = z13 - z2;

+	*d1p = z11 + z4;

+	*d7p = z11 - z4;

+	*d0p = d0;

+	*d2p = d2;

+	*d4p = d4;

+	*d6p = d6;

+}

+static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {

+	int tmp1 = val < 0 ? -val : val;

+	val = val < 0 ? val - 1 : val;

+	bits[1] = 1;

+	while (tmp1 >>= 1) {

+		++bits[1];

+	}

+	bits[0] = val & ((1 << bits[1]) - 1);

+}

+static int stbiw__jpg_processDU(stbi__write_context* s, int* bitBuf, int* bitCnt, float* CDU, int du_stride, float* fdtbl, int DC,

+								const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {

+	const unsigned short EOB[2] = {HTAC[0x00][0], HTAC[0x00][1]};

+	const unsigned short M16zeroes[2] = {HTAC[0xF0][0], HTAC[0xF0][1]};

+	int dataOff, i, j, n, diff, end0pos, x, y;

+	int DU[64];

+	// DCT rows

+	for (dataOff = 0, n = du_stride * 8; dataOff < n; dataOff += du_stride) {

+		stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + 1], &CDU[dataOff + 2], &CDU[dataOff + 3], &CDU[dataOff + 4], &CDU[dataOff + 5], &CDU[dataOff + 6],

+					   &CDU[dataOff + 7]);

+	}

+	// DCT columns

+	for (dataOff = 0; dataOff < 8; ++dataOff) {

+		stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff + du_stride], &CDU[dataOff + du_stride * 2], &CDU[dataOff + du_stride * 3], &CDU[dataOff + du_stride * 4],

+					   &CDU[dataOff + du_stride * 5], &CDU[dataOff + du_stride * 6], &CDU[dataOff + du_stride * 7]);

+	}

+	// Quantize/descale/zigzag the coefficients

+	for (y = 0, j = 0; y < 8; ++y) {

+		for (x = 0; x < 8; ++x, ++j) {

+			float v;

+			i = y * du_stride + x;

+			v = CDU[i] * fdtbl[j];

+			// DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? ceilf(v - 0.5f) :

+			// floorf(v + 0.5f)); ceilf() and floorf() are C99, not C89, but I

+			// /think/ they're not needed here anyway?

+			DU[stbiw__jpg_ZigZag[j]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);

+		}

+	}

+	// Encode DC

+	diff = DU[0] - DC;

+	if (diff == 0) {

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);

+	} else {

+		unsigned short bits[2];

+		stbiw__jpg_calcBits(diff, bits);

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);

+	}

+	// Encode ACs

+	end0pos = 63;

+	for (; (end0pos > 0) && (DU[end0pos] == 0); --end0pos) {

+	}

+	// end0pos = first element in reverse order !=0

+	if (end0pos == 0) {

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);

+		return DU[0];

+	}

+	for (i = 1; i <= end0pos; ++i) {

+		int startpos = i;

+		int nrzeroes;

+		unsigned short bits[2];

+		for (; DU[i] == 0 && i <= end0pos; ++i) {

+		}

+		nrzeroes = i - startpos;

+		if (nrzeroes >= 16) {

+			int lng = nrzeroes >> 4;

+			int nrmarker;

+			for (nrmarker = 1; nrmarker <= lng; ++nrmarker)

+				stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);

+			nrzeroes &= 15;

+		}

+		stbiw__jpg_calcBits(DU[i], bits);

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes << 4) + bits[1]]);

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);

+	}

+	if (end0pos != 63) {

+		stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);

+	}

+	return DU[0];

+}

+static int stbi_write_jpg_core(stbi__write_context* s, int width, int height, int comp, const void* data, int quality) {

+	// Constants that don't pollute global namespace

+	static const unsigned char std_dc_luminance_nrcodes[] = {0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0};

+	static const unsigned char std_dc_luminance_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};

+	static const unsigned char std_ac_luminance_nrcodes[] = {0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d};

+	static const unsigned char std_ac_luminance_values[] = {

+		0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,

+		0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,

+		0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,

+		0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,

+		0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,

+		0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,

+		0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa};

+	static const unsigned char std_dc_chrominance_nrcodes[] = {0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};

+	static const unsigned char std_dc_chrominance_values[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};

+	static const unsigned char std_ac_chrominance_nrcodes[] = {0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77};

+	static const unsigned char std_ac_chrominance_values[] = {

+		0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,

+		0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,

+		0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,

+		0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,

+		0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,

+		0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,

+		0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa};

+	// Huffman tables

+	static const unsigned short YDC_HT[256][2] = {{0, 2}, {2, 3}, {3, 3}, {4, 3}, {5, 3}, {6, 3}, {14, 4}, {30, 5}, {62, 6}, {126, 7}, {254, 8}, {510, 9}};

+	static const unsigned short UVDC_HT[256][2] = {{0, 2},  {1, 2},   {2, 2},   {6, 3},   {14, 4},	{30, 5},

+												   {62, 6}, {126, 7}, {254, 8}, {510, 9}, {1022, 10}, {2046, 11}};

+	static const unsigned short YAC_HT[256][2] = {

+		{10, 4},	 {0, 2},	  {1, 2},	  {4, 3},		{11, 4},	 {26, 5},	 {120, 7},	{248, 8},	{1014, 10},  {65410, 16}, {65411, 16},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {12, 4},	 {27, 5},		{121, 7},	{502, 9},	{2038, 11},

+		{65412, 16}, {65413, 16}, {65414, 16}, {65415, 16}, {65416, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{28, 5},	 {249, 8},	{1015, 10},  {4084, 12},  {65417, 16}, {65418, 16}, {65419, 16}, {65420, 16}, {65421, 16}, {65422, 16}, {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {58, 6},	 {503, 9},	{4085, 12},  {65423, 16}, {65424, 16}, {65425, 16},

+		{65426, 16}, {65427, 16}, {65428, 16}, {65429, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {59, 6},

+		{1016, 10},  {65430, 16}, {65431, 16}, {65432, 16}, {65433, 16}, {65434, 16}, {65435, 16}, {65436, 16}, {65437, 16}, {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{122, 7},	{2039, 11},  {65438, 16}, {65439, 16}, {65440, 16}, {65441, 16}, {65442, 16},

+		{65443, 16}, {65444, 16}, {65445, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {123, 7},	{4086, 12},

+		{65446, 16}, {65447, 16}, {65448, 16}, {65449, 16}, {65450, 16}, {65451, 16}, {65452, 16}, {65453, 16}, {0, 0},		 {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {250, 8},	{4087, 12},  {65454, 16}, {65455, 16}, {65456, 16}, {65457, 16}, {65458, 16}, {65459, 16},

+		{65460, 16}, {65461, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{504, 9},	{32704, 15}, {65462, 16},

+		{65463, 16}, {65464, 16}, {65465, 16}, {65466, 16}, {65467, 16}, {65468, 16}, {65469, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {505, 9},	{65470, 16}, {65471, 16}, {65472, 16}, {65473, 16}, {65474, 16}, {65475, 16}, {65476, 16}, {65477, 16},

+		{65478, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {506, 9},	{65479, 16}, {65480, 16}, {65481, 16},

+		{65482, 16}, {65483, 16}, {65484, 16}, {65485, 16}, {65486, 16}, {65487, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{0, 0},		 {1017, 10},  {65488, 16}, {65489, 16}, {65490, 16}, {65491, 16}, {65492, 16}, {65493, 16}, {65494, 16}, {65495, 16}, {65496, 16},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {1018, 10},  {65497, 16}, {65498, 16}, {65499, 16}, {65500, 16},

+		{65501, 16}, {65502, 16}, {65503, 16}, {65504, 16}, {65505, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{2040, 11},  {65506, 16}, {65507, 16}, {65508, 16}, {65509, 16}, {65510, 16}, {65511, 16}, {65512, 16}, {65513, 16}, {65514, 16}, {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {65515, 16}, {65516, 16}, {65517, 16}, {65518, 16}, {65519, 16}, {65520, 16},

+		{65521, 16}, {65522, 16}, {65523, 16}, {65524, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {2041, 11},  {65525, 16},

+		{65526, 16}, {65527, 16}, {65528, 16}, {65529, 16}, {65530, 16}, {65531, 16}, {65532, 16}, {65533, 16}, {65534, 16}, {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0}};

+	static const unsigned short UVAC_HT[256][2] = {

+		{0, 2},		 {1, 2},	  {4, 3},	  {10, 4},		{24, 5},	 {25, 5},	 {56, 6},	 {120, 7},	{500, 9},	{1014, 10},  {4084, 12},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {11, 4},	 {57, 6},		{246, 8},	{501, 9},	{2038, 11},

+		{4085, 12},  {65416, 16}, {65417, 16}, {65418, 16}, {65419, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{26, 5},	 {247, 8},	{1015, 10},  {4086, 12},  {32706, 15}, {65420, 16}, {65421, 16}, {65422, 16}, {65423, 16}, {65424, 16}, {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {27, 5},	 {248, 8},	{1016, 10},  {4087, 12},  {65425, 16}, {65426, 16},

+		{65427, 16}, {65428, 16}, {65429, 16}, {65430, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {58, 6},

+		{502, 9},	{65431, 16}, {65432, 16}, {65433, 16}, {65434, 16}, {65435, 16}, {65436, 16}, {65437, 16}, {65438, 16}, {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{59, 6},	 {1017, 10},  {65439, 16}, {65440, 16}, {65441, 16}, {65442, 16}, {65443, 16},

+		{65444, 16}, {65445, 16}, {65446, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {121, 7},	{2039, 11},

+		{65447, 16}, {65448, 16}, {65449, 16}, {65450, 16}, {65451, 16}, {65452, 16}, {65453, 16}, {65454, 16}, {0, 0},		 {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {122, 7},	{2040, 11},  {65455, 16}, {65456, 16}, {65457, 16}, {65458, 16}, {65459, 16}, {65460, 16},

+		{65461, 16}, {65462, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{249, 8},	{65463, 16}, {65464, 16},

+		{65465, 16}, {65466, 16}, {65467, 16}, {65468, 16}, {65469, 16}, {65470, 16}, {65471, 16}, {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {503, 9},	{65472, 16}, {65473, 16}, {65474, 16}, {65475, 16}, {65476, 16}, {65477, 16}, {65478, 16}, {65479, 16},

+		{65480, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},	  {504, 9},	{65481, 16}, {65482, 16}, {65483, 16},

+		{65484, 16}, {65485, 16}, {65486, 16}, {65487, 16}, {65488, 16}, {65489, 16}, {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{0, 0},		 {505, 9},	{65490, 16}, {65491, 16}, {65492, 16}, {65493, 16}, {65494, 16}, {65495, 16}, {65496, 16}, {65497, 16}, {65498, 16},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {506, 9},	{65499, 16}, {65500, 16}, {65501, 16}, {65502, 16},

+		{65503, 16}, {65504, 16}, {65505, 16}, {65506, 16}, {65507, 16}, {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {0, 0},	  {0, 0},

+		{2041, 11},  {65508, 16}, {65509, 16}, {65510, 16}, {65511, 16}, {65512, 16}, {65513, 16}, {65514, 16}, {65515, 16}, {65516, 16}, {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {16352, 14}, {65517, 16}, {65518, 16}, {65519, 16}, {65520, 16}, {65521, 16},

+		{65522, 16}, {65523, 16}, {65524, 16}, {65525, 16}, {0, 0},		 {0, 0},	  {0, 0},	  {0, 0},		{0, 0},		 {1018, 10},  {32707, 15},

+		{65526, 16}, {65527, 16}, {65528, 16}, {65529, 16}, {65530, 16}, {65531, 16}, {65532, 16}, {65533, 16}, {65534, 16}, {0, 0},	  {0, 0},

+		{0, 0},		 {0, 0},	  {0, 0}};

+	static const int YQT[] = {16, 11,  10,  16, 24, 40, 51, 61, 12,  12,  14,  19,  26, 58, 60, 55,  14,  13,  16,  24, 40, 57,

+							  69, 56,  14,  17, 22, 29, 51, 87, 80,  62,  18,  22,  37, 56, 68, 109, 103, 77,  24,  35, 55, 64,

+							  81, 104, 113, 92, 49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98,  112, 100, 103, 99};

+	static const int UVQT[] = {17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, 24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99,

+							   99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99};

+	static const float aasf[] = {1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,

+								 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f};

+	int row, col, i, k, subsample;

+	float fdtbl_Y[64], fdtbl_UV[64];

+	unsigned char YTable[64], UVTable[64];

+	if (!data || !width || !height || comp > 4 || comp < 1) {

+		return 0;

+	}

+	quality = quality ? quality : 90;

+	subsample = quality <= 90 ? 1 : 0;

+	quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;

+	quality = quality < 50 ? 5000 / quality : 200 - quality * 2;

+	for (i = 0; i < 64; ++i) {

+		int uvti, yti = (YQT[i] * quality + 50) / 100;

+		YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti);

+		uvti = (UVQT[i] * quality + 50) / 100;

+		UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);

+	}

+	for (row = 0, k = 0; row < 8; ++row) {

+		for (col = 0; col < 8; ++col, ++k) {

+			fdtbl_Y[k] = 1 / (YTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);

+			fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);

+		}

+	}

+	// Write Headers

+	{

+		static const unsigned char head0[] = {0xFF, 0xD8, 0xFF, 0xE0, 0, 0x10, 'J', 'F', 'I', 'F', 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0xFF, 0xDB, 0, 0x84, 0};

+		static const unsigned char head2[] = {0xFF, 0xDA, 0, 0xC, 3, 1, 0, 2, 0x11, 3, 0x11, 0, 0x3F, 0};

+		const unsigned char head1[] = {0xFF,

+									   0xC0,

+									   0,

+									   0x11,

+									   8,

+									   (unsigned char)(height >> 8),

+									   STBIW_UCHAR(height),

+									   (unsigned char)(width >> 8),

+									   STBIW_UCHAR(width),

+									   3,

+									   1,

+									   (unsigned char)(subsample ? 0x22 : 0x11),

+									   0,

+									   2,

+									   0x11,

+									   1,

+									   3,

+									   0x11,

+									   1,

+									   0xFF,

+									   0xC4,

+									   0x01,

+									   0xA2,

+									   0};

+		s->func(s->context, (void*)head0, sizeof(head0));

+		s->func(s->context, (void*)YTable, sizeof(YTable));

+		stbiw__putc(s, 1);

+		s->func(s->context, UVTable, sizeof(UVTable));

+		s->func(s->context, (void*)head1, sizeof(head1));

+		s->func(s->context, (void*)(std_dc_luminance_nrcodes + 1), sizeof(std_dc_luminance_nrcodes) - 1);

+		s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));

+		stbiw__putc(s, 0x10); // HTYACinfo

+		s->func(s->context, (void*)(std_ac_luminance_nrcodes + 1), sizeof(std_ac_luminance_nrcodes) - 1);

+		s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));

+		stbiw__putc(s, 1); // HTUDCinfo

+		s->func(s->context, (void*)(std_dc_chrominance_nrcodes + 1), sizeof(std_dc_chrominance_nrcodes) - 1);

+		s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));

+		stbiw__putc(s, 0x11); // HTUACinfo

+		s->func(s->context, (void*)(std_ac_chrominance_nrcodes + 1), sizeof(std_ac_chrominance_nrcodes) - 1);

+		s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));

+		s->func(s->context, (void*)head2, sizeof(head2));

+	}

+	// Encode 8x8 macroblocks

+	{

+		static const unsigned short fillBits[] = {0x7F, 7};

+		int DCY = 0, DCU = 0, DCV = 0;

+		int bitBuf = 0, bitCnt = 0;

+		// comp == 2 is grey+alpha (alpha is ignored)

+		int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;

+		const unsigned char* dataR = (const unsigned char*)data;

+		const unsigned char* dataG = dataR + ofsG;

+		const unsigned char* dataB = dataR + ofsB;

+		int x, y, pos;

+		if (subsample) {

+			for (y = 0; y < height; y += 16) {

+				for (x = 0; x < width; x += 16) {

+					float Y[256], U[256], V[256];

+					for (row = y, pos = 0; row < y + 16; ++row) {

+						// row >= height => use last input row

+						int clamped_row = (row < height) ? row : height - 1;

+						int base_p = (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) : clamped_row) * width * comp;

+						for (col = x; col < x + 16; ++col, ++pos) {

+							// if col >= width => use pixel from last input

+							// column

+							int p = base_p + ((col < width) ? col : (width - 1)) * comp;

+							float r = dataR[p], g = dataG[p], b = dataB[p];

+							Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128;

+							U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b;

+							V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b;

+						}

+					}

+					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

+					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

+					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

+					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y + 136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT);

+					// subsample U,V

+					{

+						float subU[64], subV[64];

+						int yy, xx;

+						for (yy = 0, pos = 0; yy < 8; ++yy) {

+							for (xx = 0; xx < 8; ++xx, ++pos) {

+								int j = yy * 32 + xx * 2;

+								subU[pos] = (U[j + 0] + U[j + 1] + U[j + 16] + U[j + 17]) * 0.25f;

+								subV[pos] = (V[j + 0] + V[j + 1] + V[j + 16] + V[j + 17]) * 0.25f;

+							}

+						}

+						DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);

+						DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);

+					}

+				}

+			}

+		} else {

+			for (y = 0; y < height; y += 8) {

+				for (x = 0; x < width; x += 8) {

+					float Y[64], U[64], V[64];

+					for (row = y, pos = 0; row < y + 8; ++row) {

+						// row >= height => use last input row

+						int clamped_row = (row < height) ? row : height - 1;

+						int base_p = (stbi__flip_vertically_on_write ? (height - 1 - clamped_row) : clamped_row) * width * comp;

+						for (col = x; col < x + 8; ++col, ++pos) {

+							// if col >= width => use pixel from last input

+							// column

+							int p = base_p + ((col < width) ? col : (width - 1)) * comp;

+							float r = dataR[p], g = dataG[p], b = dataB[p];

+							Y[pos] = +0.29900f * r + 0.58700f * g + 0.11400f * b - 128;

+							U[pos] = -0.16874f * r - 0.33126f * g + 0.50000f * b;

+							V[pos] = +0.50000f * r - 0.41869f * g - 0.08131f * b;

+						}

+					}

+					DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT);

+					DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);

+					DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);

+				}

+			}

+		}

+		// Do the bit alignment of the EOI marker

+		stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);

+	}

+	// EOI

+	stbiw__putc(s, 0xFF);

+	stbiw__putc(s, 0xD9);

+	return 1;

+}

+STBIWDEF int stbi_write_jpg_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int quality) {

+	stbi__write_context s;

+	stbi__start_write_callbacks(&s, func, context);

+	return stbi_write_jpg_core(&s, x, y, comp, (void*)data, quality);

+}

+#ifndef STBI_WRITE_NO_STDIO

+STBIWDEF int stbi_write_jpg(char const* filename, int x, int y, int comp, const void* data, int quality) {

+	stbi__write_context s;

+	if (stbi__start_write_file(&s, filename)) {

+		int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);

+		stbi__end_write_file(&s);

+		return r;

+	} else

+		return 0;

+}

+#endif

+#endif // STB_IMAGE_WRITE_IMPLEMENTATION

+/* Revision history

+	  1.14  (2020-02-02) updated JPEG writer to downsample chroma channels

+	  1.13

+	  1.12

+	  1.11  (2019-08-11)

+	  1.10  (2019-02-07)

+			 support utf8 filenames in Windows; fix warnings and platform ifdefs

+	  1.09  (2018-02-11)

+			 fix typo in zlib quality API, improve STB_I_W_STATIC in C++

+	  1.08  (2018-01-29)

+			 add stbi__flip_vertically_on_write, external zlib, zlib quality,

+   choose PNG filter 1.07  (2017-07-24) doc fix 1.06 (2017-07-23) writing JPEG

+   (using Jon Olick's code) 1.05   ??? 1.04 (2017-03-03) monochrome BMP

+   expansion 1.03   ??? 1.02 (2016-04-02) avoid allocating large structures on

+   the stack 1.01 (2016-01-16) STBIW_REALLOC_SIZED: support allocators with no

+   realloc support avoid race-condition in crc initialization minor compile

+   issues 1.00 (2015-09-14) installable file IO function 0.99 (2015-09-13)

+			 warning fixes; TGA rle support

+	  0.98 (2015-04-08)

+			 added STBIW_MALLOC, STBIW_ASSERT etc

+	  0.97 (2015-01-18)

+			 fixed HDR asserts, rewrote HDR rle logic

+	  0.96 (2015-01-17)

+			 add HDR output

+			 fix monochrome BMP

+	  0.95 (2014-08-17)

+					   add monochrome TGA output

+	  0.94 (2014-05-31)

+			 rename private functions to avoid conflicts with stb_image.h

+	  0.93 (2014-05-27)

+			 warning fixes

+	  0.92 (2010-08-01)

+			 casts to unsigned char to fix warnings

+	  0.91 (2010-07-17)

+			 first public release

+	  0.90   first internal release

+*/

+/*

+------------------------------------------------------------------------------

+This software is available under 2 licenses -- choose whichever you prefer.

+------------------------------------------------------------------------------

+ALTERNATIVE A - MIT License

+Copyright (c) 2017 Sean Barrett

+Permission is hereby granted, free of charge, to any person obtaining a copy of

+this software and associated documentation files (the "Software"), to deal in

+the Software without restriction, including without limitation the rights to

+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies

+of the Software, and to permit persons to whom the Software is furnished to do

+so, subject to the following conditions:

+The above copyright notice and this permission notice shall be included in all

+copies or substantial portions of the Software.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

+SOFTWARE.

+------------------------------------------------------------------------------

+ALTERNATIVE B - Public Domain (www.unlicense.org)

+This is free and unencumbered software released into the public domain.

+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this

+software, either in source code form or as a compiled binary, for any purpose,

+commercial or non-commercial, and by any means.

+In jurisdictions that recognize copyright laws, the author or authors of this

+software dedicate any and all copyright interest in the software to the public

+domain. We make this dedication for the benefit of the public at large and to

+the detriment of our heirs and successors. We intend this dedication to be an

+overt act of relinquishment in perpetuity of all present and future rights to

+this software under copyright law.

+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION

+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+------------------------------------------------------------------------------

+*/

--- /dev/null

+++ b/include-demo/stringutil.h

@@ -1,0 +1,330 @@

+#include <string.h>

+#include <stdlib.h>

+#include <stdio.h>

+#include <ctype.h>

+//Before we get on, "stringutil.h" is the most C-ish name for a source code file ever, amirite?

+#ifndef STRUTIL_ALLOC

+#define STRUTIL_ALLOC(s) malloc(s)

+#endif

+#ifndef STRUTIL_FREE

+#define STRUTIL_FREE(s) free(s)

+#endif

+#ifndef STRUTIL_REALLOC

+#define STRUTIL_REALLOC(s, t) realloc(s,t)

+#endif

+#ifndef STRUTIL_NO_SHORT_NAMES

+#define strcata strcatalloc

+#define strcataf1 strcatallocf1

+#define strcataf2 strcatallocf2

+#define strcatafb strcatallocfb

+#endif

+//Strcat but with malloc.

+static inline char* strcatalloc(const char* s1, const char* s2){

+	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

+	if(d){

+		strcpy(d, s1);

+		strcat(d, s2);

+	}

+	return d;

+}

+//Free the first argument.

+static inline char* strcatallocf1(char* s1, const char* s2){

+	char* d = STRUTIL_REALLOC(s1, strlen(s1) + strlen(s2) + 1);

+	//char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

+	if(d){

+		//strcpy(d, s1);

+		strcat(d, s2);

+	}

+	//STRUTIL_FREE(s1);

+	return d;

+}

+//Free the second argument.

+static inline char* strcatallocf2(const char* s1, char* s2){

+	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

+	if(d){

+		strcpy(d, s1);

+		strcat(d, s2);

+	}

+	STRUTIL_FREE(s2);

+	return d;

+}

+//Free both arguments

+static inline char* strcatallocfb(char* s1, char* s2){

+	char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);

+	if(d){

+		strcpy(d, s1);

+		strcat(d, s2);

+	}

+	STRUTIL_FREE(s1);

+	STRUTIL_FREE(s2);

+	return d;

+}

+//Convert a non-null-terminated URL into a null terminated one.

+static inline char* str_null_terminated_alloc(const char* in, unsigned int len){

+	char* d = NULL; d = malloc(len+1);

+	if(d){

+		memcpy(d,in,len);

+		d[len] = '\0';

+	}

+	return d;

+}

+static inline unsigned int strprefix(const char *pre, const char *str)

+{

+    size_t lenpre = strlen(pre),

+           lenstr = strlen(str);

+    return lenstr < lenpre ? 0 : memcmp(pre, str, lenpre) == 0;

+}

+//Someone once said sub-string search was an O(n^2) algorithm. What the hell?

+static inline long long strfind(const char* text, const char* subtext){

+	long long ti = 0;

+	long long si = 0;

+	long long st = strlen(subtext);

+	for(;text[ti] != '\0';ti++){

+		if(text[ti] == subtext[si]) {

+			si++;

+			if(subtext[si] == '\0') return (ti - st)+1;

+		}else {

+			si = 0;

+			if(subtext[si] == '\0') return (ti - st);

+		}

+	}

+	return -1;

+}

+//Read file until terminator character is found.

+//Returns the number of characters copied.

+static inline unsigned long long read_until_terminator(FILE* f, char* buf, const unsigned long long buflen, char terminator){

+	unsigned long long i = 0;

+	char c;

+	for(i = 0; i < (buflen-1); i++)

+	{

+		if(feof(f))break;

+		c = fgetc(f);

+		if(c == terminator)break;

+		buf[i] = c;

+	}

+	buf[buflen-1] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!

+	return i;

+}

+//Same as above but allocates memory to guarantee it can hold the entire thing. Grows naturally.

+static inline char* read_until_terminator_alloced(FILE* f, unsigned long long* lenout, char terminator, unsigned long long initsize){

+	char c;

+	char* buf = STRUTIL_ALLOC(initsize);

+	if(!buf) return NULL;

+	unsigned long long bcap = initsize;

+	unsigned long long blen = 0;

+	while(1){

+		if(feof(f)){break;}

+		c = fgetc(f);

+		if(c == terminator) {break;}

+		if(blen == (bcap-1))	//Grow the buffer.

+			{

+				bcap<<=1;

+				char* bufold = buf;

+				buf = STRUTIL_REALLOC(buf, bcap);

+				if(!buf){free(bufold); return NULL;}

+			}

+		buf[blen++] = c;

+	}

+	buf[blen] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!

+	*lenout = blen;

+	return buf;

+}

+static inline void* read_file_into_alloced_buffer(FILE* f, unsigned long long* len){

+	void* buf = NULL;

+	if(!f) return NULL;

+	fseek(f, 0, SEEK_END);

+	*len = ftell(f);

+	fseek(f,0,SEEK_SET);

+	buf = STRUTIL_ALLOC(*len + 1);

+	if(!buf) return NULL;

+	fread(buf, 1, *len, f);

+	((char*)buf)[*len] = '\0';

+	return buf;

+}

+//GEK'S SIMPLE TEXT COMPRESSION SCHEMA

+/*LIMITATIONS

+* Token names must be alphabetic (a-z, A-Z)

+* The token mark must be escaped with a backslash.

+* Token names which are substrings of other ones must be listed later

+*/

+static inline char* strencodealloc(const char* inbuf, const char** tokens, unsigned long long ntokens, char esc, char tokmark){

+	unsigned long long lenin = strlen(inbuf);

+	char c; unsigned long long i = 0;

+	char c_str[512] = {0}; //We are going to be sprintf-ing to this buffer.

+	char* out = NULL;

+	c_str[0] = esc;

+	c_str[1] = tokmark;

+	out = strcatalloc(c_str, "");

+	c_str[0] = 0;

+	c_str[1] = 0;

+	//Write out all the token entries. format is namelength~definition

+	for(unsigned long long j = 0; j < ntokens; j++){

+		out = strcataf1(out, tokens[2*j]);

+		//Write out the length of the token.

+		snprintf(c_str, 512, "%llu", (unsigned long long)strlen(tokens[2*j+1]));

+		out = strcataf1(out, c_str);

+		c_str[0] = tokmark;

+		c_str[1] = 0;

+		out = strcataf1(out, c_str);

+		out = strcataf1(out, tokens[2*j+1]);

+	}

+	c_str[0] = esc;

+	c_str[1] = 0;

+	out = strcataf1(out, c_str);

+	//We have now created the header. Now to begin encoding the text.

+	for(i=0; i<lenin; i++){

+		for(unsigned long long t = 0; t < ntokens; t++) //t- the token we are processing.

+			if(strprefix(tokens[t*2+1], inbuf+i)){ //Matched at least one

+				unsigned long long howmany = 1;

+				unsigned long long curtoklen = strlen(tokens[t*2+1]); //Length of the current token we are counting

+				for(unsigned long long h=1;i+h*curtoklen < lenin;h++){

+					if(strprefix(tokens[t*2+1], inbuf+i+h*curtoklen))

+						{howmany++;}

+					else

+						break; //The number of these things is limited.

+				}

+				//We know what token and how many, write it to out

+				c_str[0] = tokmark;

+				c_str[1] = 0;

+				out = strcataf1(out, c_str);

+				if(howmany > 1){

+					snprintf(c_str, 512, "%llu", (unsigned long long)howmany);

+					out = strcataf1(out, c_str);

+				}

+				out = strcataf1(out, tokens[t*2]);

+				i+=howmany*curtoklen;

+				continue;

+			}

+		//Test if we need to escape a sequence.

+		if(inbuf[i] == esc || inbuf[i] == tokmark){

+			c_str[0] = esc;

+			c_str[1] = 0;

+			out = strcataf1(out, c_str);

+		}

+		//We were unable to find a match, just write the character out.

+		c_str[0] = inbuf[i];

+		c_str[1] = 0;

+		out = strcataf1(out, c_str);

+	}

+	return out;

+}

+static inline char* strdecodealloc(char* inbuf){

+	unsigned long long lenin = strlen(inbuf);

+	if(lenin < 3) {

+		//puts("\nToo Short!\n");

+		return NULL;

+	}

+	char esc = inbuf[0]; //The escape character is the first one.

+	char tokmark = inbuf[1]; //Begin token character.

+	//printf("Escape is %c, tokmark is %c\n", esc, tokmark);

+	char c; unsigned long long i = 2;

+	char c_str[2] = {0,0};

+	//Our decoded text.

+	char* out = strcatalloc("","");

+	//Tokens for replacement, even is the token,

+	//odd is its definition

+	char** tokens = NULL;

+	//unsigned long long* toklens = NULL;

+	unsigned long long ntokens = 0;

+//#define {if(i <= lenin) c = inbuf[i++]; else {goto end;}} {if(i <= lenin) c = inbuf[i++]; else {goto end;}}

+	//Retrieve the tokens.

+	{if(i <= lenin) c = inbuf[i++]; else {goto end;}}; //has to occur before the loop.

+	while(c != esc){	ntokens++;

+		tokens = STRUTIL_REALLOC(tokens, ntokens * 2 * sizeof(char*));

+		//toklens = STRUTIL_REALLOC(toklens, ntokens * sizeof(unsigned long long));

+		//toklens[ntokens-1] = 0;

+		tokens[(ntokens-1)*2] = strcatalloc("","");

+		tokens[(ntokens-1)*2+1] = strcatalloc("","");

+		//name of token is tokens[(ntokens-1)*2] and its definition is tokens[(ntokens-1)*2+1]

+		//Get the name of the token.

+		if(!isalpha(c)) goto end;	//Error! Can't have Break out.

+		while(isalpha(c)){

+			c_str[0] = c;

+			tokens[(ntokens-1)*2] = strcatallocf1(tokens[(ntokens-1)*2], c_str);

+			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+		}

+		//The last retrieve() got us the first digit of the token length.

+		//Get the length of the token

+		unsigned long long l = 0;

+		if(!isdigit(c)) goto end;

+		while(isdigit(c) && c!=tokmark){

+			c_str[0] = c;

+			l *= 10;

+			l += atoi(c_str);

+			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+		}

+		//toklens[ntokens-1] = l;

+		//We have the name of the token and its length, the last {if(i <= lenin) c = inbuf[i++]; else {goto end;}} got us the token character (~ in my example)

+		//Now we can grab the token definition.

+		for(unsigned long long vv = 0; vv < l; vv++){

+			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+			c_str[0] = c;

+			tokens[(ntokens-1)*2+1] = strcatallocf1(tokens[(ntokens-1)*2+1], c_str);

+		}

+		{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+	//	printf("\nTOKEN %s IS %s, length %llu",tokens[(ntokens-1)*2] ,tokens[(ntokens-1)*2+1], l);

+	}

+	//puts("\nREACHED ESCAPE CHARACTER.");

+	//Now we attempt to build our string

+	{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+	long long doescape = 0;

+	while(i<=lenin){

+		if(!doescape && c==esc){

+			doescape=1;{if(i <= lenin) c = inbuf[i++]; else {goto end;}};continue;

+		}

+		if(!doescape && c==tokmark){

+			//Handle digits prefixing a token.

+			unsigned long long l = 0;

+			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+			if(isdigit(c))

+				while(isdigit(c)){

+					c_str[0] = c;

+					l *= 10;

+					l += atoi(c_str);

+					{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+				}

+			else {l=1;}

+			i--;

+			for(unsigned long long t = 0; t < ntokens; t++)

+				if(strprefix(tokens[t*2], inbuf+i)){

+					//MATCH!

+					for(unsigned long long q = 0; q < l; q++)

+						out = strcatallocf1(out, tokens[t*2+1]);

+					i+=strlen(tokens[t*2]);

+					break; //break out of the for.

+				}

+			if(i<=lenin) {if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+			continue;

+		}else{

+			c_str[0] = c;

+			out = strcatallocf1(out, c_str);

+			doescape = 0;

+			{if(i <= lenin) c = inbuf[i++]; else {goto end;}};

+		}

+	}

+	end:

+	if(tokens){

+		for(unsigned long long j = 0; j < ntokens; j++)

+			{STRUTIL_FREE(tokens[j*2]);STRUTIL_FREE(tokens[j*2+1]);}

+		STRUTIL_FREE(tokens);

+	}

+	//if(toklens)STRUTIL_FREE(toklens);

+	return out;

+}

--- /dev/null

+++ b/include-demo/tobjparse.h

@@ -1,0 +1,364 @@

+/* Public Domain / CC0 3d OBJ Parser

+With support for Per Vertex Color (VC) Lines.

+Written by Gek (DMHSW) in 2020

+*/

+#ifndef TOBJ_PARSE_H

+#define TOBJ_PARSE_H

+#include "3dMath.h"

+#include <stdio.h>

+#include <string.h>

+#include <stdlib.h>

+#include <ctype.h>

+typedef struct{

+	long long unsigned int p;

+	long long unsigned int n;

+	long long unsigned int tc;

+	long long unsigned int vc;

+}facedef;

+typedef struct{

+	unsigned int npos, nnorm, ntexcoords, ncolors, nfaces;

+	vec3* positions;

+	vec3* normals;

+	vec3* texcoords;

+	vec3* colors;

+	facedef* faces;

+}objraw;

+typedef struct{

+	int npoints; //Number of points.

+	vec3* d; //Triangles (Same winding as in the file)

+	vec3* n; //Normals

+	vec3* t; //Texture Cordinates

+	vec3* c; //colors

+}model;

+objraw initobjraw(){

+	return (objraw){

+		.npos=0,

+		.nnorm=0,

+		.ntexcoords=0,

+		.ncolors=0,

+		.nfaces=0,

+		.positions=NULL,

+		.normals=NULL,

+		.texcoords=NULL,

+		.faces=NULL

+	};

+}

+model initmodel(){

+	return (model){

+		.npoints=0,

+		.d=NULL,

+		.n=NULL,

+		.t=NULL,

+		.c=NULL

+	};

+}

+void freeobjraw(objraw* o){

+	free(o->positions);

+	free(o->texcoords);

+	free(o->normals);

+	free(o->colors);

+	free(o->faces);

+}

+void freemodel(model* o){

+	free(o->d);

+	free(o->t);

+	free(o->n);

+	free(o->c);

+}

+model tobj_tomodel(objraw* raw){

+	if(!raw || raw->faces == NULL)

+	{

+		puts("\nAttempted to convert empty model... Aborting...\n");

+		return initmodel();

+	}

+	model ret = initmodel();

+	ret.npoints = 0;

+	ret.d= malloc(sizeof(vec3) * raw->nfaces);

+	if(raw->normals)ret.n=malloc(sizeof(vec3) * raw->nfaces);

+	if(raw->texcoords)ret.t=malloc(sizeof(vec3) * raw->nfaces);

+	if(raw->colors)ret.c=malloc(sizeof(vec3) * raw->nfaces);

+	long long unsigned int piter = 0;

+	long long unsigned int niter = 0;

+	long long unsigned int titer = 0;

+	long long unsigned int citer = 0;

+	//printf("\nsb_count of faces is %d",sb_count(raw->faces));

+	for(long long int i = 0; i < raw->nfaces;i++){

+		//printf("\n::%lld:: 0\n",i);

+		long long unsigned int p = raw->faces[i].p-1;

+		long long unsigned int n = raw->faces[i].n-1;

+		long long unsigned int t = raw->faces[i].tc-1;

+		long long unsigned int c = raw->faces[i].vc-1;

+		if(p < (long long unsigned int)raw->npos){

+			//sb_push(ret.d, raw->positions[p]);

+			ret.d[piter++] = raw->positions[p];

+			ret.npoints++;

+		} else {

+			printf("\nBad Data!!!\n");

+			printf("p=%llu n=%llu t=%llu c=%llu i=%lld\n\n",p,n,t,c,i);

+		}

+		if(raw->normals){

+			if(n < (long long unsigned int)raw->nnorm){

+				//sb_push(ret.n, raw->normals[n]);

+				ret.n[niter++] = raw->normals[n];

+			} else {

+				puts("\n<BAD DATA>, NORMALS\n");

+			}

+		}

+		if(raw->texcoords){

+			if(t < (long long unsigned int)raw->ntexcoords){

+				//sb_push(ret.t, raw->texcoords[t]);

+				ret.t[titer++] = raw->texcoords[t];

+			} else

+				puts("\n<BAD DATA>, TEXCOORDS\n");

+		}

+		if(raw->colors){

+			if(c < (long long unsigned int)raw->ncolors){

+				//sb_push(ret.c, raw->colors[c]);

+				ret.c[citer++] = raw->colors[c];

+			} else {

+				puts("\n<BAD DATA>, COLORS\n");

+			}

+		}

+	}

+	//printf("\ntobj_tomodel completed.\n");

+	if(ret.npoints != piter){

+		printf("\nBAD DATA!!! ABORTING...\n");

+		exit(1);

+	}

+	return ret;

+}

+//Only loads

+objraw tobj_load(const char* fn){

+	FILE* f;

+	f = fopen(fn, "r");

+	objraw retval = initobjraw();

+	if(f){

+		char line[2048];line[2047]=0;

+		//int read = 0;

+#define TOBJ_PUSH(type, vec, n, val){vec = realloc(vec, sizeof(type) * (n+1)); vec[n++] = val;}

+		while(fgets(line, 2047, f)){

+			vec3 val;

+			facedef frick0;

+			facedef frick1;

+			facedef frick2;

+			if(line[0] == 'v' && line[1] == ' ' && (strlen(line) > 4)){

+				//read = sscanf(line,"v %f %f %f",&val.d[0],&val.d[1],&val.d[2]);

+				//printf("\nv Read: %d",read);

+				//sb_push(retval.positions, val);

+				TOBJ_PUSH(vec3, retval.positions, retval.npos, val);

+				char* t = line+2;

+				//sb_last(retval.positions).d[0] = atof(t);

+				retval.positions[retval.npos-1].d[0] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.positions).d[1] = atof(t);

+				retval.positions[retval.npos-1].d[1] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.positions).d[2] = atof(t);

+				retval.positions[retval.npos-1].d[2] = atof(t);

+			}

+			if(line[0] == 'v' && line[1] == 't' && (strlen(line) > 4)){

+				//read = sscanf(line,"vt %f %f",&val.d[0],&val.d[1]);

+				//sb_push(retval.texcoords, val);

+				TOBJ_PUSH(vec3, retval.texcoords, retval.ntexcoords, val);

+				char* t = line+3;

+				//sb_last(retval.texcoords).d[0] = atof(t);

+				retval.texcoords[retval.ntexcoords-1].d[0] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.texcoords).d[1] = -atof(t);

+				retval.texcoords[retval.ntexcoords-1].d[1] = -atof(t);

+			}

+			if(line[0] == 'v' && line[1] == 'c' && (strlen(line) > 4)){

+				//read=sscanf(line,"vc %f %f %f",&val.d[0],&val.d[1],&val.d[2]);

+				//sb_push(retval.colors, val);

+				TOBJ_PUSH(vec3, retval.colors, retval.ncolors, val);

+				char* t = line+3;

+				//sb_last(retval.colors).d[0] = atof(t);

+				retval.colors[retval.ncolors-1].d[0] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.colors).d[1] = atof(t);

+				retval.colors[retval.ncolors-1].d[1] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.colors).d[2] = atof(t);

+				retval.colors[retval.ncolors-1].d[2] = atof(t);

+				//printf("\nvc Read: %d",read);

+			}

+			if(line[0] == 'v' && line[1] =='n' && (strlen(line) > 4)){

+				//read=sscanf(line,"vn %f %f %f",&val.d[0],&val.d[1],&val.d[2]);

+				//printf("\nn Read: %d",read);

+				//sb_push(retval.normals, val);

+				TOBJ_PUSH(vec3, retval.normals, retval.nnorm, val);

+				char* t = line+3;

+				//sb_last(retval.normals).d[0] = atof(t);

+				retval.normals[retval.nnorm-1].d[0] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.normals).d[1] = atof(t);

+				retval.normals[retval.nnorm-1].d[1] = atof(t);

+				while(!isspace(*t) && *t != '\0')t++;

+				if(*t == '\0')continue;

+				t++;

+				//sb_last(retval.normals).d[2] = atof(t);

+				retval.normals[retval.nnorm-1].d[2] = atof(t);

+			}

+			if(line[0] == 'f' && (strlen(line) > 4)){

+				//The face lines are hard to parse.

+				//They could be p p p

+				// or p/vt p/vt p/vt

+				//or p//n p//n p//n

+				//or p/vt/n p/vt/n p/vt/n

+				//or p/vt/n/c p/vt/n/c p/vt/n/c

+				//or some other combination, as long as it has p it's valid

+				//Grab the position indices

+				{

+					char* t = line+2;

+					frick0.p = strtoull(t,NULL,10);

+					//printf("\nf[0].p is %llu",frick0.p);

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					frick1.p = strtoull(t,NULL,10);

+					//printf("\nf[1].p is %llu",frick1.p);

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					frick2.p = strtoull(t,NULL,10);

+					//printf("\nf[2].p is %llu",frick2.p);

+				}

+				//Grab the texture coordinates (First character after first slash!)

+				if(retval.texcoords){

+					char* t = line+2;

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					frick0.tc = strtoull(t,NULL,10);

+					//printf("\nf[0].tc is %llu",frick0.tc);

+					//Jump to the next group of numbers

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					//first slash

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to next slash

+					t++;//next character

+					frick1.tc = strtoull(t,NULL,10);

+					//printf("\nf[1].tc is %llu",frick1.tc);

+					//Jump to the next group of numbers

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					//first slash

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to next slash

+					t++;//next character

+					frick2.tc = strtoull(t,NULL,10);

+					//printf("\nf[2].tc is %llu",frick2.tc);

+				}

+				if(retval.normals){

+					char* t = line+2;

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

+					t++;//next character

+					frick0.n = strtoull(t,NULL,10);

+					//Jump to the next group of numbers

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					//first slash

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

+					t++;//next character

+					frick1.n = strtoull(t,NULL,10);

+					//Jump to the next group of numbers

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					//first slash

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

+					t++;//next character

+					frick2.n = strtoull(t,NULL,10);

+				}

+				if(retval.colors){

+					char* t = line+2;

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to third slash

+					t++;//next character

+					frick0.vc = strtoull(t,NULL,10);

+					//printf("\nf[0].vc is %llu",frick0.vc);

+					//Jump to the next group of numbers

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					//first slash

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to third slash

+					t++;//next character

+					frick1.vc = strtoull(t,NULL,10);

+					//printf("\nf[1].vc is %llu",frick1.vc);

+					//Jump to the next group of numbers

+					while(!isspace(*t) && *t != '\0')t++;

+					if(*t == '\0')continue;

+					t++;

+					//first slash

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to first slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to second slash

+					t++;//next character

+					while(!isspace(*t) && *t != '\0' && *t != '/')t++; //jump to third slash

+					t++;//next character

+					frick2.vc = strtoull(t,NULL,10);

+					//printf("\nf[2].vc is %llu",frick2.vc);

+				}

+				//sb_push(retval.faces,frick0);

+				TOBJ_PUSH(facedef, retval.faces, retval.nfaces, frick0);

+				//sb_push(retval.faces,frick1);

+				TOBJ_PUSH(facedef, retval.faces, retval.nfaces, frick1);

+				//sb_push(retval.faces,frick2);

+				TOBJ_PUSH(facedef, retval.faces, retval.nfaces, frick2);

+				/*

+				printf("\nReading from sb, frick0.p=%llu frick0.n=%llu frick0.tc=%llu frick0.vc=%llu",

+					retval.faces[sb_count(retval.faces)-3].p,

+					retval.faces[sb_count(retval.faces)-3].n,

+					retval.faces[sb_count(retval.faces)-3].tc,

+					retval.faces[sb_count(retval.faces)-3].vc

+				);*/

+			}

+		}

+	fclose(f);

+	} else {

+		printf("\nUnable to load file %s\n",fn);

+	}

+	return retval;

+}

+#endif

--

⑨