shithub: tinygl

Download patch

ref: b833e3a78434e4956e434a9e86781b879c2d6f96
parent: da02e2032846813bffaff2c951625c62c27a48f4
author: gek <6974902+gordonfreeman424@users.noreply.github.com>
date: Fri Sep 3 18:45:43 EDT 2021

gitap from gek@Katherine.

--- /dev/null
+++ b/include-demo/stb_c_lexer.h
@@ -1,0 +1,940 @@
+// stb_c_lexer.h - v0.12 - public domain Sean Barrett 2013
+// lexer for making little C-like languages with recursive-descent parsers
+//
+// This file provides both the interface and the implementation.
+// To instantiate the implementation,
+//      #define STB_C_LEXER_IMPLEMENTATION
+// in *ONE* source file, before #including this file.
+//
+// The default configuration is fairly close to a C lexer, although
+// suffixes on integer constants are not handled (you can override this).
+//
+// History:
+//     0.12 fix compilation bug for NUL support; better support separate inclusion
+//     0.11 fix clang static analysis warning
+//     0.10 fix warnings
+//     0.09 hex floats, no-stdlib fixes
+//     0.08 fix bad pointer comparison
+//     0.07 fix mishandling of hexadecimal constants parsed by strtol
+//     0.06 fix missing next character after ending quote mark (Andreas Fredriksson)
+//     0.05 refixed get_location because github version had lost the fix
+//     0.04 fix octal parsing bug
+//     0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option
+//          refactor API to simplify (only one struct instead of two)
+//          change literal enum names to have 'lit' at the end
+//     0.02 first public release
+//
+// Status:
+//     - haven't tested compiling as C++
+//     - haven't tested the float parsing path
+//     - haven't tested the non-default-config paths (e.g. non-stdlib)
+//     - only tested default-config paths by eyeballing output of self-parse
+//
+//     - haven't implemented multiline strings
+//     - haven't implemented octal/hex character constants
+//     - haven't implemented support for unicode CLEX_char
+//     - need to expand error reporting so you don't just get "CLEX_parse_error"
+//
+// Contributors:
+//   Arpad Goretity (bugfix)
+//   Alan Hickman (hex floats)
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+#ifdef STB_C_LEXER_IMPLEMENTATION
+#ifndef STB_C_LEXER_DEFINITIONS
+// to change the default parsing rules, copy the following lines
+// into your C/C++ file *before* including this, and then replace
+// the Y's with N's for the ones you don't want. This needs to be
+// set to the same values for every place in your program where
+// stb_c_lexer.h is included.
+// --BEGIN--
+
+#if defined(Y) || defined(N)
+#error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined"
+#endif
+
+#define STB_C_LEX_C_DECIMAL_INTS    Y   //  "0|[1-9][0-9]*"                        CLEX_intlit
+#define STB_C_LEX_C_HEX_INTS        Y   //  "0x[0-9a-fA-F]+"                       CLEX_intlit
+#define STB_C_LEX_C_OCTAL_INTS      Y   //  "[0-7]+"                               CLEX_intlit
+#define STB_C_LEX_C_DECIMAL_FLOATS  Y   //  "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?)     CLEX_floatlit
+#define STB_C_LEX_C99_HEX_FLOATS    N   //  "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+     CLEX_floatlit
+#define STB_C_LEX_C_IDENTIFIERS     Y   //  "[_a-zA-Z][_a-zA-Z0-9]*"               CLEX_id
+#define STB_C_LEX_C_DQ_STRINGS      Y   //  double-quote-delimited strings with escapes  CLEX_dqstring
+#define STB_C_LEX_C_SQ_STRINGS      N   //  single-quote-delimited strings with escapes  CLEX_ssstring
+#define STB_C_LEX_C_CHARS           Y   //  single-quote-delimited character with escape CLEX_charlits
+#define STB_C_LEX_C_COMMENTS        Y   //  "/* comment */"
+#define STB_C_LEX_CPP_COMMENTS      Y   //  "// comment to end of line\n"
+#define STB_C_LEX_C_COMPARISONS     Y   //  "==" CLEX_eq  "!=" CLEX_noteq   "<=" CLEX_lesseq  ">=" CLEX_greatereq
+#define STB_C_LEX_C_LOGICAL         Y   //  "&&"  CLEX_andand   "||"  CLEX_oror
+#define STB_C_LEX_C_SHIFTS          Y   //  "<<"  CLEX_shl      ">>"  CLEX_shr
+#define STB_C_LEX_C_INCREMENTS      Y   //  "++"  CLEX_plusplus "--"  CLEX_minusminus
+#define STB_C_LEX_C_ARROW           Y   //  "->"  CLEX_arrow
+#define STB_C_LEX_EQUAL_ARROW       N   //  "=>"  CLEX_eqarrow
+#define STB_C_LEX_C_BITWISEEQ       Y   //  "&="  CLEX_andeq    "|="  CLEX_oreq     "^="  CLEX_xoreq
+#define STB_C_LEX_C_ARITHEQ         Y   //  "+="  CLEX_pluseq   "-="  CLEX_minuseq
+                                        //  "*="  CLEX_muleq    "/="  CLEX_diveq    "%=" CLEX_modeq
+                                        //  if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ:
+                                        //                      "<<=" CLEX_shleq    ">>=" CLEX_shreq
+
+#define STB_C_LEX_PARSE_SUFFIXES    N   // letters after numbers are parsed as part of those numbers, and must be in suffix list below
+#define STB_C_LEX_DECIMAL_SUFFIXES  ""  // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage
+#define STB_C_LEX_HEX_SUFFIXES      ""  // e.g. "uUlL"
+#define STB_C_LEX_OCTAL_SUFFIXES    ""  // e.g. "uUlL"
+#define STB_C_LEX_FLOAT_SUFFIXES    ""  //
+
+#define STB_C_LEX_0_IS_EOF             N  // if Y, ends parsing at '\0'; if N, returns '\0' as token
+#define STB_C_LEX_INTEGERS_AS_DOUBLES  N  // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N
+#define STB_C_LEX_MULTILINE_DSTRINGS   N  // allow newlines in double-quoted strings
+#define STB_C_LEX_MULTILINE_SSTRINGS   N  // allow newlines in single-quoted strings
+#define STB_C_LEX_USE_STDLIB           Y  // use strtod,strtol for parsing #s; otherwise inaccurate hack
+#define STB_C_LEX_DOLLAR_IDENTIFIER    Y  // allow $ as an identifier character
+#define STB_C_LEX_FLOAT_NO_DECIMAL     Y  // allow floats that have no decimal point if they have an exponent
+
+#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES  N   // if Y, all CLEX_ token names are defined, even if never returned
+                                              // leaving it as N should help you catch config bugs
+
+#define STB_C_LEX_DISCARD_PREPROCESSOR    Y   // discard C-preprocessor directives (e.g. after prepocess
+                                              // still have #line, #pragma, etc)
+
+//#define STB_C_LEX_ISWHITE(str)    ... // return length in bytes of whitespace characters if first char is whitespace
+
+#define STB_C_LEXER_DEFINITIONS         // This line prevents the header file from replacing your definitions
+// --END--
+#endif
+#endif
+
+#ifndef INCLUDE_STB_C_LEXER_H
+#define INCLUDE_STB_C_LEXER_H
+
+typedef struct
+{
+   // lexer variables
+   char *input_stream;
+   char *eof;
+   char *parse_point;
+   char *string_storage;
+   int   string_storage_len;
+
+   // lexer parse location for error messages
+   char *where_firstchar;
+   char *where_lastchar;
+
+   // lexer token variables
+   long token;
+   double real_number;
+   long   int_number;
+   char *string;
+   int string_len;
+} stb_lexer;
+
+typedef struct
+{
+   int line_number;
+   int line_offset;
+} stb_lex_location;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length);
+// this function initialize the 'lexer' structure
+//   Input:
+//   - input_stream points to the file to parse, loaded into memory
+//   - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF
+//   - string_store is storage the lexer can use for storing parsed strings and identifiers
+//   - store_length is the length of that storage
+
+extern int stb_c_lexer_get_token(stb_lexer *lexer);
+// this function returns non-zero if a token is parsed, or 0 if at EOF
+//   Output:
+//   - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error
+//   - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES
+//   - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit
+//   - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier
+//   - lexer->string_len is the byte length of lexer->string
+
+extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc);
+// this inefficient function returns the line number and character offset of a
+// given location in the file as returned by stb_lex_token. Because it's inefficient,
+// you should only call it for errors, not for every token.
+// For error messages of invalid tokens, you typically want the location of the start
+// of the token (which caused the token to be invalid). For bugs involving legit
+// tokens, you can report the first or the range.
+//    Output:
+//    - loc->line_number is the line number in the file, counting from 1, of the location
+//    - loc->line_offset is the char-offset in the line, counting from 0, of the location
+
+
+#ifdef __cplusplus
+}
+#endif
+
+enum
+{
+   CLEX_eof = 256,
+   CLEX_parse_error,
+   CLEX_intlit        ,
+   CLEX_floatlit      ,
+   CLEX_id            ,
+   CLEX_dqstring      ,
+   CLEX_sqstring      ,
+   CLEX_charlit       ,
+   CLEX_eq            ,
+   CLEX_noteq         ,
+   CLEX_lesseq        ,
+   CLEX_greatereq     ,
+   CLEX_andand        ,
+   CLEX_oror          ,
+   CLEX_shl           ,
+   CLEX_shr           ,
+   CLEX_plusplus      ,
+   CLEX_minusminus    ,
+   CLEX_pluseq        ,
+   CLEX_minuseq       ,
+   CLEX_muleq         ,
+   CLEX_diveq         ,
+   CLEX_modeq         ,
+   CLEX_andeq         ,
+   CLEX_oreq          ,
+   CLEX_xoreq         ,
+   CLEX_arrow         ,
+   CLEX_eqarrow       ,
+   CLEX_shleq, CLEX_shreq,
+
+   CLEX_first_unused_token
+
+};
+#endif // INCLUDE_STB_C_LEXER_H
+
+#ifdef STB_C_LEXER_IMPLEMENTATION
+
+// Hacky definitions so we can easily #if on them
+#define Y(x) 1
+#define N(x) 0
+
+#if STB_C_LEX_INTEGERS_AS_DOUBLES(x)
+typedef double     stb__clex_int;
+#define intfield   real_number
+#define STB__clex_int_as_double
+#else
+typedef long       stb__clex_int;
+#define intfield   int_number
+#endif
+
+// Convert these config options to simple conditional #defines so we can more
+// easily test them once we've change the meaning of Y/N
+
+#if STB_C_LEX_PARSE_SUFFIXES(x)
+#define STB__clex_parse_suffixes
+#endif
+
+#if STB_C_LEX_C99_HEX_FLOATS(x)
+#define STB__clex_hex_floats
+#endif
+
+#if STB_C_LEX_C_HEX_INTS(x)
+#define STB__clex_hex_ints
+#endif
+
+#if STB_C_LEX_C_DECIMAL_INTS(x)
+#define STB__clex_decimal_ints
+#endif
+
+#if STB_C_LEX_C_OCTAL_INTS(x)
+#define STB__clex_octal_ints
+#endif
+
+#if STB_C_LEX_C_DECIMAL_FLOATS(x)
+#define STB__clex_decimal_floats
+#endif
+
+#if STB_C_LEX_DISCARD_PREPROCESSOR(x)
+#define STB__clex_discard_preprocessor
+#endif
+
+#if STB_C_LEX_USE_STDLIB(x) && (!defined(STB__clex_hex_floats) || __STDC_VERSION__ >= 199901L)
+#define STB__CLEX_use_stdlib
+#include <stdlib.h>
+#endif
+
+// Now for the rest of the file we'll use the basic definition where
+// where Y expands to its contents and N expands to nothing
+#undef  Y
+#define Y(a) a
+#undef N
+#define N(a)
+
+// API function
+void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length)
+{
+   lexer->input_stream = (char *) input_stream;
+   lexer->eof = (char *) input_stream_end;
+   lexer->parse_point = (char *) input_stream;
+   lexer->string_storage = string_store;
+   lexer->string_storage_len = store_length;
+}
+
+// API function
+void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc)
+{
+   char *p = lexer->input_stream;
+   int line_number = 1;
+   int char_offset = 0;
+   while (*p && p < where) {
+      if (*p == '\n' || *p == '\r') {
+         p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline
+         line_number += 1;
+         char_offset = 0;
+      } else {
+         ++p;
+         ++char_offset;
+      }
+   }
+   loc->line_number = line_number;
+   loc->line_offset = char_offset;
+}
+
+// main helper function for returning a parsed token
+static int stb__clex_token(stb_lexer *lexer, int token, char *start, char *end)
+{
+   lexer->token = token;
+   lexer->where_firstchar = start;
+   lexer->where_lastchar = end;
+   lexer->parse_point = end+1;
+   return 1;
+}
+
+// helper function for returning eof
+static int stb__clex_eof(stb_lexer *lexer)
+{
+   lexer->token = CLEX_eof;
+   return 0;
+}
+
+static int stb__clex_iswhite(int x)
+{
+   return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f';
+}
+
+static const char *stb__strchr(const char *str, int ch)
+{
+   for (; *str; ++str)
+      if (*str == ch)
+         return str;
+   return 0;
+}
+
+// parse suffixes at the end of a number
+static int stb__clex_parse_suffixes(stb_lexer *lexer, long tokenid, char *start, char *cur, const char *suffixes)
+{
+   #ifdef STB__clex_parse_suffixes
+   lexer->string = lexer->string_storage;
+   lexer->string_len = 0;
+
+   while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) {
+      if (stb__strchr(suffixes, *cur) == 0)
+         return stb__clex_token(lexer, CLEX_parse_error, start, cur);
+      if (lexer->string_len+1 >= lexer->string_storage_len)
+         return stb__clex_token(lexer, CLEX_parse_error, start, cur);
+      lexer->string[lexer->string_len++] = *cur++;
+   }
+   #else
+   suffixes = suffixes; // attempt to suppress warnings
+   #endif
+   return stb__clex_token(lexer, tokenid, start, cur-1);
+}
+
+#ifndef STB__CLEX_use_stdlib
+static double stb__clex_pow(double base, unsigned int exponent)
+{
+   double value=1;
+   for ( ; exponent; exponent >>= 1) {
+      if (exponent & 1)
+         value *= base;
+      base *= base;
+   }
+   return value;
+}
+
+static double stb__clex_parse_float(char *p, char **q)
+{
+   char *s = p;
+   double value=0;
+   int base=10;
+   int exponent=0;
+
+#ifdef STB__clex_hex_floats
+   if (*p == '0') {
+      if (p[1] == 'x' || p[1] == 'X') {
+         base=16;
+         p += 2;
+      }
+   }
+#endif
+
+   for (;;) {
+      if (*p >= '0' && *p <= '9')
+         value = value*base + (*p++ - '0');
+#ifdef STB__clex_hex_floats
+      else if (base == 16 && *p >= 'a' && *p <= 'f')
+         value = value*base + 10 + (*p++ - 'a');
+      else if (base == 16 && *p >= 'A' && *p <= 'F')
+         value = value*base + 10 + (*p++ - 'A');
+#endif
+      else
+         break;
+   }
+
+   if (*p == '.') {
+      double pow, addend = 0;
+      ++p;
+      for (pow=1; ; pow*=base) {
+         if (*p >= '0' && *p <= '9')
+            addend = addend*base + (*p++ - '0');
+#ifdef STB__clex_hex_floats
+         else if (base == 16 && *p >= 'a' && *p <= 'f')
+            addend = addend*base + 10 + (*p++ - 'a');
+         else if (base == 16 && *p >= 'A' && *p <= 'F')
+            addend = addend*base + 10 + (*p++ - 'A');
+#endif
+         else
+            break;
+      }
+      value += addend / pow;
+   }
+#ifdef STB__clex_hex_floats
+   if (base == 16) {
+      // exponent required for hex float literal
+      if (*p != 'p' && *p != 'P') {
+         *q = s;
+         return 0;
+      }
+      exponent = 1;
+   } else
+#endif
+      exponent = (*p == 'e' || *p == 'E');
+
+   if (exponent) {
+      int sign = p[1] == '-';
+      unsigned int exponent=0;
+      double power=1;
+      ++p;
+      if (*p == '-' || *p == '+')
+         ++p;
+      while (*p >= '0' && *p <= '9')
+         exponent = exponent*10 + (*p++ - '0');
+
+#ifdef STB__clex_hex_floats
+      if (base == 16)
+         power = stb__clex_pow(2, exponent);
+      else
+#endif
+         power = stb__clex_pow(10, exponent);
+      if (sign)
+         value /= power;
+      else
+         value *= power;
+   }
+   *q = p;
+   return value;
+}
+#endif
+
+static int stb__clex_parse_char(char *p, char **q)
+{
+   if (*p == '\\') {
+      *q = p+2; // tentatively guess we'll parse two characters
+      switch(p[1]) {
+         case '\\': return '\\';
+         case '\'': return '\'';
+         case '"': return '"';
+         case 't': return '\t';
+         case 'f': return '\f';
+         case 'n': return '\n';
+         case 'r': return '\r';
+         case '0': return '\0'; // @TODO ocatal constants
+         case 'x': case 'X': return -1; // @TODO hex constants
+         case 'u': return -1; // @TODO unicode constants
+      }
+   }
+   *q = p+1;
+   return (unsigned char) *p;
+}
+
+static int stb__clex_parse_string(stb_lexer *lexer, char *p, int type)
+{
+   char *start = p;
+   char delim = *p++; // grab the " or ' for later matching
+   char *out = lexer->string_storage;
+   char *outend = lexer->string_storage + lexer->string_storage_len;
+   while (*p != delim) {
+      int n;
+      if (*p == '\\') {
+         char *q;
+         n = stb__clex_parse_char(p, &q);
+         if (n < 0)
+            return stb__clex_token(lexer, CLEX_parse_error, start, q);
+         p = q;
+      } else {
+         // @OPTIMIZE: could speed this up by looping-while-not-backslash
+         n = (unsigned char) *p++;
+      }
+      if (out+1 > outend)
+         return stb__clex_token(lexer, CLEX_parse_error, start, p);
+      // @TODO expand unicode escapes to UTF8
+      *out++ = (char) n;
+   }
+   *out = 0;
+   lexer->string = lexer->string_storage;
+   lexer->string_len = (int) (out - lexer->string_storage);
+   return stb__clex_token(lexer, type, start, p);
+}
+
+int stb_c_lexer_get_token(stb_lexer *lexer)
+{
+   char *p = lexer->parse_point;
+
+   // skip whitespace and comments
+   for (;;) {
+      #ifdef STB_C_LEX_ISWHITE
+      while (p != lexer->stream_end) {
+         int n;
+         n = STB_C_LEX_ISWHITE(p);
+         if (n == 0) break;
+         if (lexer->eof && lexer->eof - lexer->parse_point < n)
+            return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1);
+         p += n;
+      }
+      #else
+      while (p != lexer->eof && stb__clex_iswhite(*p))
+         ++p;
+      #endif
+
+      STB_C_LEX_CPP_COMMENTS(
+         if (p != lexer->eof && p[0] == '/' && p[1] == '/') {
+            while (p != lexer->eof && *p != '\r' && *p != '\n')
+               ++p;
+            continue;
+         }
+      )
+
+      STB_C_LEX_C_COMMENTS(
+         if (p != lexer->eof && p[0] == '/' && p[1] == '*') {
+            char *start = p;
+            p += 2;
+            while (p != lexer->eof && (p[0] != '*' || p[1] != '/'))
+               ++p;
+            if (p == lexer->eof)
+               return stb__clex_token(lexer, CLEX_parse_error, start, p-1);
+            p += 2;
+            continue;
+         }
+      )
+
+      #ifdef STB__clex_discard_preprocessor
+         // @TODO this discards everything after a '#', regardless
+         // of where in the line the # is, rather than requiring it
+         // be at the start. (because this parser doesn't otherwise
+         // check for line breaks!)
+         if (p != lexer->eof && p[0] == '#') {
+            while (p != lexer->eof && *p != '\r' && *p != '\n')
+               ++p;
+            continue;
+         }
+      #endif
+
+      break;
+   }
+
+   if (p == lexer->eof)
+      return stb__clex_eof(lexer);
+
+   switch (*p) {
+      default:
+         if (   (*p >= 'a' && *p <= 'z')
+             || (*p >= 'A' && *p <= 'Z')
+             || *p == '_' || (unsigned char) *p >= 128    // >= 128 is UTF8 char
+             STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) )
+         {
+            int n = 0;
+            lexer->string = lexer->string_storage;
+            lexer->string_len = n;
+            do {
+               if (n+1 >= lexer->string_storage_len)
+                  return stb__clex_token(lexer, CLEX_parse_error, p, p+n);
+               lexer->string[n] = p[n];
+               ++n;
+            } while (
+                  (p[n] >= 'a' && p[n] <= 'z')
+               || (p[n] >= 'A' && p[n] <= 'Z')
+               || (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier
+               || p[n] == '_' || (unsigned char) p[n] >= 128
+                STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' )
+            );
+            lexer->string[n] = 0;
+            return stb__clex_token(lexer, CLEX_id, p, p+n-1);
+         }
+
+         // check for EOF
+         STB_C_LEX_0_IS_EOF(
+            if (*p == 0)
+               return stb__clex_eof(lexer);
+         )
+
+      single_char:
+         // not an identifier, return the character as itself
+         return stb__clex_token(lexer, *p, p, p);
+
+      case '+':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, CLEX_plusplus, p,p+1);)
+            STB_C_LEX_C_ARITHEQ(   if (p[1] == '=') return stb__clex_token(lexer, CLEX_pluseq  , p,p+1);)
+         }
+         goto single_char;
+      case '-':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, CLEX_minusminus, p,p+1);)
+            STB_C_LEX_C_ARITHEQ(   if (p[1] == '=') return stb__clex_token(lexer, CLEX_minuseq   , p,p+1);)
+            STB_C_LEX_C_ARROW(     if (p[1] == '>') return stb__clex_token(lexer, CLEX_arrow     , p,p+1);)
+         }
+         goto single_char;
+      case '&':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_LOGICAL(  if (p[1] == '&') return stb__clex_token(lexer, CLEX_andand, p,p+1);)
+            STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_andeq , p,p+1);)
+         }
+         goto single_char;
+      case '|':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_LOGICAL(  if (p[1] == '|') return stb__clex_token(lexer, CLEX_oror, p,p+1);)
+            STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_oreq, p,p+1);)
+         }
+         goto single_char;
+      case '=':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_eq, p,p+1);)
+            STB_C_LEX_EQUAL_ARROW(  if (p[1] == '>') return stb__clex_token(lexer, CLEX_eqarrow, p,p+1);)
+         }
+         goto single_char;
+      case '!':
+         STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_noteq, p,p+1);)
+         goto single_char;
+      case '^':
+         STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_xoreq, p,p+1));
+         goto single_char;
+      case '%':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_modeq, p,p+1));
+         goto single_char;
+      case '*':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_muleq, p,p+1));
+         goto single_char;
+      case '/':
+         STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_diveq, p,p+1));
+         goto single_char;
+      case '<':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_lesseq, p,p+1);)
+            STB_C_LEX_C_SHIFTS(     if (p[1] == '<') {
+                                       STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=')
+                                                              return stb__clex_token(lexer, CLEX_shleq, p,p+2);)
+                                       return stb__clex_token(lexer, CLEX_shl, p,p+1);
+                                    }
+                              )
+         }
+         goto single_char;
+      case '>':
+         if (p+1 != lexer->eof) {
+            STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_greatereq, p,p+1);)
+            STB_C_LEX_C_SHIFTS(     if (p[1] == '>') {
+                                       STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=')
+                                                              return stb__clex_token(lexer, CLEX_shreq, p,p+2);)
+                                       return stb__clex_token(lexer, CLEX_shr, p,p+1);
+                                    }
+                              )
+         }
+         goto single_char;
+
+      case '"':
+         STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_dqstring);)
+         goto single_char;
+      case '\'':
+         STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_sqstring);)
+         STB_C_LEX_C_CHARS(
+         {
+            char *start = p;
+            lexer->int_number = stb__clex_parse_char(p+1, &p);
+            if (lexer->int_number < 0)
+               return stb__clex_token(lexer, CLEX_parse_error, start,start);
+            if (p == lexer->eof || *p != '\'')
+               return stb__clex_token(lexer, CLEX_parse_error, start,p);
+            return stb__clex_token(lexer, CLEX_charlit, start, p+1);
+         })
+         goto single_char;
+
+      case '0':
+         #if defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats)
+            if (p+1 != lexer->eof) {
+               if (p[1] == 'x' || p[1] == 'X') {
+                  char *q;
+
+                  #ifdef STB__clex_hex_floats
+                  for (q=p+2;
+                       q != lexer->eof && ((*q >= '0' && *q <= '9') || (*q >= 'a' && *q <= 'f') || (*q >= 'A' && *q <= 'F'));
+                       ++q);
+                  if (q != lexer->eof) {
+                     if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'p' || *q == 'P')) {
+                        #ifdef STB__CLEX_use_stdlib
+                        lexer->real_number = strtod((char *) p, (char**) &q);
+                        #else
+                        lexer->real_number = stb__clex_parse_float(p, &q);
+                        #endif
+
+                        if (p == q)
+                           return stb__clex_token(lexer, CLEX_parse_error, p,q);
+                        return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES);
+
+                     }
+                  }
+                  #endif   // STB__CLEX_hex_floats
+
+                  #ifdef STB__clex_hex_ints
+                  #ifdef STB__CLEX_use_stdlib
+                  lexer->int_number = strtol((char *) p, (char **) &q, 16);
+                  #else
+                  {
+                     stb__clex_int n=0;
+                     for (q=p+2; q != lexer->eof; ++q) {
+                        if (*q >= '0' && *q <= '9')
+                           n = n*16 + (*q - '0');
+                        else if (*q >= 'a' && *q <= 'f')
+                           n = n*16 + (*q - 'a') + 10;
+                        else if (*q >= 'A' && *q <= 'F')
+                           n = n*16 + (*q - 'A') + 10;
+                        else
+                           break;
+                     }
+                     lexer->int_number = n;
+                  }
+                  #endif
+                  if (q == p+2)
+                     return stb__clex_token(lexer, CLEX_parse_error, p-2,p-1);
+                  return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_HEX_SUFFIXES);
+                  #endif
+               }
+            }
+         #endif // defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats)
+         // can't test for octal because we might parse '0.0' as float or as '0' '.' '0',
+         // so have to do float first
+
+         /* FALL THROUGH */
+      case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+
+         #ifdef STB__clex_decimal_floats
+         {
+            char *q = p;
+            while (q != lexer->eof && (*q >= '0' && *q <= '9'))
+               ++q;
+            if (q != lexer->eof) {
+               if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) {
+                  #ifdef STB__CLEX_use_stdlib
+                  lexer->real_number = strtod((char *) p, (char**) &q);
+                  #else
+                  lexer->real_number = stb__clex_parse_float(p, &q);
+                  #endif
+
+                  return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES);
+
+               }
+            }
+         }
+         #endif // STB__clex_decimal_floats
+
+         #ifdef STB__clex_octal_ints
+         if (p[0] == '0') {
+            char *q = p;
+            #ifdef STB__CLEX_use_stdlib
+            lexer->int_number = strtol((char *) p, (char **) &q, 8);
+            #else
+            stb__clex_int n=0;
+            while (q != lexer->eof) {
+               if (*q >= '0' && *q <= '7')
+                  n = n*8 + (*q - '0');
+               else
+                  break;
+               ++q;
+            }
+            if (q != lexer->eof && (*q == '8' || *q=='9'))
+               return stb__clex_token(lexer, CLEX_parse_error, p, q);
+            lexer->int_number = n;
+            #endif
+            return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES);
+         }
+         #endif // STB__clex_octal_ints
+
+         #ifdef STB__clex_decimal_ints
+         {
+            char *q = p;
+            #ifdef STB__CLEX_use_stdlib
+            lexer->int_number = strtol((char *) p, (char **) &q, 10);
+            #else
+            stb__clex_int n=0;
+            while (q != lexer->eof) {
+               if (*q >= '0' && *q <= '9')
+                  n = n*10 + (*q - '0');
+               else
+                  break;
+               ++q;
+            }
+            lexer->int_number = n;
+            #endif
+            return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES);
+         }
+         #endif // STB__clex_decimal_ints
+         goto single_char;
+   }
+}
+#endif // STB_C_LEXER_IMPLEMENTATION
+
+#ifdef STB_C_LEXER_SELF_TEST
+#define _CRT_SECURE_NO_WARNINGS
+#include <stdio.h>
+#include <stdlib.h>
+
+static void print_token(stb_lexer *lexer)
+{
+   switch (lexer->token) {
+      case CLEX_id        : printf("_%s", lexer->string); break;
+      case CLEX_eq        : printf("=="); break;
+      case CLEX_noteq     : printf("!="); break;
+      case CLEX_lesseq    : printf("<="); break;
+      case CLEX_greatereq : printf(">="); break;
+      case CLEX_andand    : printf("&&"); break;
+      case CLEX_oror      : printf("||"); break;
+      case CLEX_shl       : printf("<<"); break;
+      case CLEX_shr       : printf(">>"); break;
+      case CLEX_plusplus  : printf("++"); break;
+      case CLEX_minusminus: printf("--"); break;
+      case CLEX_arrow     : printf("->"); break;
+      case CLEX_andeq     : printf("&="); break;
+      case CLEX_oreq      : printf("|="); break;
+      case CLEX_xoreq     : printf("^="); break;
+      case CLEX_pluseq    : printf("+="); break;
+      case CLEX_minuseq   : printf("-="); break;
+      case CLEX_muleq     : printf("*="); break;
+      case CLEX_diveq     : printf("/="); break;
+      case CLEX_modeq     : printf("%%="); break;
+      case CLEX_shleq     : printf("<<="); break;
+      case CLEX_shreq     : printf(">>="); break;
+      case CLEX_eqarrow   : printf("=>"); break;
+      case CLEX_dqstring  : printf("\"%s\"", lexer->string); break;
+      case CLEX_sqstring  : printf("'\"%s\"'", lexer->string); break;
+      case CLEX_charlit   : printf("'%s'", lexer->string); break;
+      #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib)
+      case CLEX_intlit    : printf("#%g", lexer->real_number); break;
+      #else
+      case CLEX_intlit    : printf("#%ld", lexer->int_number); break;
+      #endif
+      case CLEX_floatlit  : printf("%g", lexer->real_number); break;
+      default:
+         if (lexer->token >= 0 && lexer->token < 256)
+            printf("%c", (int) lexer->token);
+         else {
+            printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer->token);
+         }
+         break;
+   }
+}
+
+/* Force a test
+of parsing
+multiline comments */
+
+/*/ comment /*/
+/**/ extern /**/
+
+void dummy(void)
+{
+   double some_floats[] = {
+      1.0501, -10.4e12, 5E+10,
+#if 0   // not supported in C++ or C-pre-99, so don't try to compile it, but let our parser test it
+      0x1.0p+24, 0xff.FP-8, 0x1p-23,
+#endif
+      4.
+   };
+   (void) sizeof(some_floats);
+   (void) some_floats[1];
+
+   printf("test %d",1); // https://github.com/nothings/stb/issues/13
+}
+
+int main(int argc, char **argv)
+{
+   FILE *f = fopen("stb_c_lexer.h","rb");
+   char *text = (char *) malloc(1 << 20);
+   int len = f ? (int) fread(text, 1, 1<<20, f) : -1;
+   stb_lexer lex;
+   if (len < 0) {
+      fprintf(stderr, "Error opening file\n");
+      free(text);
+      fclose(f);
+      return 1;
+   }
+   fclose(f);
+
+   stb_c_lexer_init(&lex, text, text+len, (char *) malloc(0x10000), 0x10000);
+   while (stb_c_lexer_get_token(&lex)) {
+      if (lex.token == CLEX_parse_error) {
+         printf("\n<<<PARSE ERROR>>>\n");
+         break;
+      }
+      print_token(&lex);
+      printf("  ");
+   }
+   return 0;
+}
+#endif
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_connected_components.h
@@ -1,0 +1,1049 @@
+// stb_connected_components - v0.96 - public domain connected components on grids
+//                                                 http://github.com/nothings/stb
+//
+// Finds connected components on 2D grids for testing reachability between
+// two points, with fast updates when changing reachability (e.g. on one machine
+// it was typically 0.2ms w/ 1024x1024 grid). Each grid square must be "open" or
+// "closed" (traversable or untraversable), and grid squares are only connected
+// to their orthogonal neighbors, not diagonally.
+//
+// In one source file, create the implementation by doing something like this:
+//
+//   #define STBCC_GRID_COUNT_X_LOG2    10
+//   #define STBCC_GRID_COUNT_Y_LOG2    10
+//   #define STB_CONNECTED_COMPONENTS_IMPLEMENTATION
+//   #include "stb_connected_components.h"
+//
+// The above creates an implementation that can run on maps up to 1024x1024.
+// Map sizes must be a multiple of (1<<(LOG2/2)) on each axis (e.g. 32 if LOG2=10,
+// 16 if LOG2=8, etc.) (You can just pad your map with untraversable space.)
+//
+// MEMORY USAGE
+//
+//   Uses about 6-7 bytes per grid square (e.g. 7MB for a 1024x1024 grid).
+//   Uses a single worst-case allocation which you pass in.
+//
+// PERFORMANCE
+//
+//   On a core i7-2700K at 3.5 Ghz, for a particular 1024x1024 map (map_03.png):
+//
+//       Creating map                   : 44.85 ms
+//       Making one square   traversable:  0.27 ms    (average over 29,448 calls)
+//       Making one square untraversable:  0.23 ms    (average over 30,123 calls)
+//       Reachability query:               0.00001 ms (average over 4,000,000 calls)
+//
+//   On non-degenerate maps update time is O(N^0.5), but on degenerate maps like
+//   checkerboards or 50% random, update time is O(N^0.75) (~2ms on above machine).
+//
+// CHANGELOG
+//
+//    0.96  (2019-03-04)  Fix warnings
+//    0.95  (2016-10-16)  Bugfix if multiple clumps in one cluster connect to same clump in another
+//    0.94  (2016-04-17)  Bugfix & optimize worst case (checkerboard & random)
+//    0.93  (2016-04-16)  Reduce memory by 10x for 1Kx1K map; small speedup
+//    0.92  (2016-04-16)  Compute sqrt(N) cluster size by default
+//    0.91  (2016-04-15)  Initial release
+//
+// TODO:
+//    - better API documentation
+//    - more comments
+//    - try re-integrating naive algorithm & compare performance
+//    - more optimized batching (current approach still recomputes local clumps many times)
+//    - function for setting a grid of squares at once (just use batching)
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// ALGORITHM
+//
+//   The NxN grid map is split into sqrt(N) x sqrt(N) blocks called
+//  "clusters". Each cluster independently computes a set of connected
+//   components within that cluster (ignoring all connectivity out of
+//   that cluster) using a union-find disjoint set forest. This produces a bunch
+//   of locally connected components called "clumps". Each clump is (a) connected
+//   within its cluster, (b) does not directly connect to any other clumps in the
+//   cluster (though it may connect to them by paths that lead outside the cluster,
+//   but those are ignored at this step), and (c) maintains an adjacency list of
+//   all clumps in adjacent clusters that it _is_ connected to. Then a second
+//   union-find disjoint set forest is used to compute connected clumps
+//   globally, across the whole map. Reachability is then computed by
+//   finding which clump each input point belongs to, and checking whether
+//   those clumps are in the same "global" connected component.
+//
+//   The above data structure can be updated efficiently; on a change
+//   of a single grid square on the map, only one cluster changes its
+//   purely-local state, so only one cluster needs its clumps fully
+//   recomputed. Clumps in adjacent clusters need their adjacency lists
+//   updated: first to remove all references to the old clumps in the
+//   rebuilt cluster, then to add new references to the new clumps. Both
+//   of these operations can use the existing "find which clump each input
+//   point belongs to" query to compute that adjacency information rapidly.
+
+#ifndef INCLUDE_STB_CONNECTED_COMPONENTS_H
+#define INCLUDE_STB_CONNECTED_COMPONENTS_H
+
+#include <stdlib.h>
+
+typedef struct st_stbcc_grid stbcc_grid;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+//  initialization
+//
+
+// you allocate the grid data structure to this size (note that it will be very big!!!)
+extern size_t stbcc_grid_sizeof(void);
+
+// initialize the grid, value of map[] is 0 = traversable, non-0 is solid
+extern void stbcc_init_grid(stbcc_grid *g, unsigned char *map, int w, int h);
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+//  main functionality
+//
+
+// update a grid square state, 0 = traversable, non-0 is solid
+// i can add a batch-update if it's needed
+extern void stbcc_update_grid(stbcc_grid *g, int x, int y, int solid);
+
+// query if two grid squares are reachable from each other
+extern int stbcc_query_grid_node_connection(stbcc_grid *g, int x1, int y1, int x2, int y2);
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+//
+//  bonus functions
+//
+
+// wrap multiple stbcc_update_grid calls in these function to compute
+// multiple updates more efficiently; cannot make queries inside batch
+extern void stbcc_update_batch_begin(stbcc_grid *g);
+extern void stbcc_update_batch_end(stbcc_grid *g);
+
+// query the grid data structure for whether a given square is open or not
+extern int stbcc_query_grid_open(stbcc_grid *g, int x, int y);
+
+// get a unique id for the connected component this is in; it's not necessarily
+// small, you'll need a hash table or something to remap it (or just use
+extern unsigned int stbcc_get_unique_id(stbcc_grid *g, int x, int y);
+#define STBCC_NULL_UNIQUE_ID 0xffffffff // returned for closed map squares
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // INCLUDE_STB_CONNECTED_COMPONENTS_H
+
+#ifdef STB_CONNECTED_COMPONENTS_IMPLEMENTATION
+
+#include <assert.h>
+#include <string.h> // memset
+
+#if !defined(STBCC_GRID_COUNT_X_LOG2) || !defined(STBCC_GRID_COUNT_Y_LOG2)
+   #error "You must define STBCC_GRID_COUNT_X_LOG2 and STBCC_GRID_COUNT_Y_LOG2 to define the max grid supported."
+#endif
+
+#define STBCC__GRID_COUNT_X (1 << STBCC_GRID_COUNT_X_LOG2)
+#define STBCC__GRID_COUNT_Y (1 << STBCC_GRID_COUNT_Y_LOG2)
+
+#define STBCC__MAP_STRIDE   (1 << (STBCC_GRID_COUNT_X_LOG2-3))
+
+#ifndef STBCC_CLUSTER_SIZE_X_LOG2
+   #define STBCC_CLUSTER_SIZE_X_LOG2   (STBCC_GRID_COUNT_X_LOG2/2) // log2(sqrt(2^N)) = 1/2 * log2(2^N)) = 1/2 * N
+   #if STBCC_CLUSTER_SIZE_X_LOG2 > 6
+   #undef STBCC_CLUSTER_SIZE_X_LOG2
+   #define STBCC_CLUSTER_SIZE_X_LOG2 6
+   #endif
+#endif
+
+#ifndef STBCC_CLUSTER_SIZE_Y_LOG2
+   #define STBCC_CLUSTER_SIZE_Y_LOG2   (STBCC_GRID_COUNT_Y_LOG2/2)
+   #if STBCC_CLUSTER_SIZE_Y_LOG2 > 6
+   #undef STBCC_CLUSTER_SIZE_Y_LOG2
+   #define STBCC_CLUSTER_SIZE_Y_LOG2 6
+   #endif
+#endif
+
+#define STBCC__CLUSTER_SIZE_X   (1 << STBCC_CLUSTER_SIZE_X_LOG2)
+#define STBCC__CLUSTER_SIZE_Y   (1 << STBCC_CLUSTER_SIZE_Y_LOG2)
+
+#define STBCC__CLUSTER_COUNT_X_LOG2   (STBCC_GRID_COUNT_X_LOG2 - STBCC_CLUSTER_SIZE_X_LOG2)
+#define STBCC__CLUSTER_COUNT_Y_LOG2   (STBCC_GRID_COUNT_Y_LOG2 - STBCC_CLUSTER_SIZE_Y_LOG2)
+
+#define STBCC__CLUSTER_COUNT_X  (1 << STBCC__CLUSTER_COUNT_X_LOG2)
+#define STBCC__CLUSTER_COUNT_Y  (1 << STBCC__CLUSTER_COUNT_Y_LOG2)
+
+#if STBCC__CLUSTER_SIZE_X >= STBCC__GRID_COUNT_X || STBCC__CLUSTER_SIZE_Y >= STBCC__GRID_COUNT_Y
+   #error "STBCC_CLUSTER_SIZE_X/Y_LOG2 must be smaller than STBCC_GRID_COUNT_X/Y_LOG2"
+#endif
+
+// worst case # of clumps per cluster
+#define STBCC__MAX_CLUMPS_PER_CLUSTER_LOG2   (STBCC_CLUSTER_SIZE_X_LOG2 + STBCC_CLUSTER_SIZE_Y_LOG2-1)
+#define STBCC__MAX_CLUMPS_PER_CLUSTER        (1 << STBCC__MAX_CLUMPS_PER_CLUSTER_LOG2)
+#define STBCC__MAX_CLUMPS                    (STBCC__MAX_CLUMPS_PER_CLUSTER * STBCC__CLUSTER_COUNT_X * STBCC__CLUSTER_COUNT_Y)
+#define STBCC__NULL_CLUMPID                  STBCC__MAX_CLUMPS_PER_CLUSTER
+
+#define STBCC__CLUSTER_X_FOR_COORD_X(x)  ((x) >> STBCC_CLUSTER_SIZE_X_LOG2)
+#define STBCC__CLUSTER_Y_FOR_COORD_Y(y)  ((y) >> STBCC_CLUSTER_SIZE_Y_LOG2)
+
+#define STBCC__MAP_BYTE_MASK(x,y)       (1 << ((x) & 7))
+#define STBCC__MAP_BYTE(g,x,y)          ((g)->map[y][(x) >> 3])
+#define STBCC__MAP_OPEN(g,x,y)          (STBCC__MAP_BYTE(g,x,y) & STBCC__MAP_BYTE_MASK(x,y))
+
+typedef unsigned short stbcc__clumpid;
+typedef unsigned char stbcc__verify_max_clumps[STBCC__MAX_CLUMPS_PER_CLUSTER < (1 << (8*sizeof(stbcc__clumpid))) ? 1 : -1];
+
+#define STBCC__MAX_EXITS_PER_CLUSTER   (STBCC__CLUSTER_SIZE_X + STBCC__CLUSTER_SIZE_Y)   // 64 for 32x32
+#define STBCC__MAX_EXITS_PER_CLUMP     (STBCC__CLUSTER_SIZE_X + STBCC__CLUSTER_SIZE_Y)   // 64 for 32x32
+#define STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER  (STBCC__MAX_EXITS_PER_CLUMP)
+
+// 2^19 * 2^6 => 2^25 exits => 2^26  => 64MB for 1024x1024
+
+// Logic for above on 4x4 grid:
+//
+// Many clumps:      One clump:
+//   + +               +  +
+//  +X.X.             +XX.X+
+//   .X.X+             .XXX
+//  +X.X.              XXX.
+//   .X.X+            +X.XX+
+//    + +              +  +
+//
+// 8 exits either way
+
+typedef unsigned char stbcc__verify_max_exits[STBCC__MAX_EXITS_PER_CLUMP <= 256];
+
+typedef struct
+{
+   unsigned short clump_index:12;
+     signed short cluster_dx:2;
+     signed short cluster_dy:2;
+} stbcc__relative_clumpid;
+
+typedef union
+{
+   struct {
+      unsigned int clump_index:12;
+      unsigned int cluster_x:10;
+      unsigned int cluster_y:10;
+   } f;
+   unsigned int c;
+} stbcc__global_clumpid;
+
+// rebuilt cluster 3,4
+
+// what changes in cluster 2,4
+
+typedef struct
+{
+   stbcc__global_clumpid global_label;        // 4
+   unsigned char num_adjacent;                // 1
+   unsigned char max_adjacent;                // 1
+   unsigned char adjacent_clump_list_index;   // 1
+   unsigned char reserved;
+} stbcc__clump; // 8
+
+#define STBCC__CLUSTER_ADJACENCY_COUNT   (STBCC__MAX_EXITS_PER_CLUSTER*2)
+typedef struct
+{
+   short num_clumps;
+   unsigned char num_edge_clumps;
+   unsigned char rebuild_adjacency;
+   stbcc__clump clump[STBCC__MAX_CLUMPS_PER_CLUSTER];       // 8 * 2^9 = 4KB
+   stbcc__relative_clumpid adjacency_storage[STBCC__CLUSTER_ADJACENCY_COUNT]; // 256 bytes
+} stbcc__cluster;
+
+struct st_stbcc_grid
+{
+   int w,h,cw,ch;
+   int in_batched_update;
+   //unsigned char cluster_dirty[STBCC__CLUSTER_COUNT_Y][STBCC__CLUSTER_COUNT_X]; // could bitpack, but: 1K x 1K => 1KB
+   unsigned char map[STBCC__GRID_COUNT_Y][STBCC__MAP_STRIDE]; // 1K x 1K => 1K x 128 => 128KB
+   stbcc__clumpid clump_for_node[STBCC__GRID_COUNT_Y][STBCC__GRID_COUNT_X];  // 1K x 1K x 2 = 2MB
+   stbcc__cluster cluster[STBCC__CLUSTER_COUNT_Y][STBCC__CLUSTER_COUNT_X]; //  1K x 4.5KB = 4.5MB
+};
+
+int stbcc_query_grid_node_connection(stbcc_grid *g, int x1, int y1, int x2, int y2)
+{
+   stbcc__global_clumpid label1, label2;
+   stbcc__clumpid c1 = g->clump_for_node[y1][x1];
+   stbcc__clumpid c2 = g->clump_for_node[y2][x2];
+   int cx1 = STBCC__CLUSTER_X_FOR_COORD_X(x1);
+   int cy1 = STBCC__CLUSTER_Y_FOR_COORD_Y(y1);
+   int cx2 = STBCC__CLUSTER_X_FOR_COORD_X(x2);
+   int cy2 = STBCC__CLUSTER_Y_FOR_COORD_Y(y2);
+   assert(!g->in_batched_update);
+   if (c1 == STBCC__NULL_CLUMPID || c2 == STBCC__NULL_CLUMPID)
+      return 0;
+   label1 = g->cluster[cy1][cx1].clump[c1].global_label;
+   label2 = g->cluster[cy2][cx2].clump[c2].global_label;
+   if (label1.c == label2.c)
+      return 1;
+   return 0;
+}
+
+int stbcc_query_grid_open(stbcc_grid *g, int x, int y)
+{
+   return STBCC__MAP_OPEN(g, x, y) != 0;
+}
+
+unsigned int stbcc_get_unique_id(stbcc_grid *g, int x, int y)
+{
+   stbcc__clumpid c = g->clump_for_node[y][x];
+   int cx = STBCC__CLUSTER_X_FOR_COORD_X(x);
+   int cy = STBCC__CLUSTER_Y_FOR_COORD_Y(y);
+   assert(!g->in_batched_update);
+   if (c == STBCC__NULL_CLUMPID) return STBCC_NULL_UNIQUE_ID;
+   return g->cluster[cy][cx].clump[c].global_label.c;
+}
+
+typedef struct
+{
+   unsigned char x,y;
+} stbcc__tinypoint;
+
+typedef struct
+{
+   stbcc__tinypoint parent[STBCC__CLUSTER_SIZE_Y][STBCC__CLUSTER_SIZE_X]; // 32x32 => 2KB
+   stbcc__clumpid   label[STBCC__CLUSTER_SIZE_Y][STBCC__CLUSTER_SIZE_X];
+} stbcc__cluster_build_info;
+
+static void stbcc__build_clumps_for_cluster(stbcc_grid *g, int cx, int cy);
+static void stbcc__remove_connections_to_adjacent_cluster(stbcc_grid *g, int cx, int cy, int dx, int dy);
+static void stbcc__add_connections_to_adjacent_cluster(stbcc_grid *g, int cx, int cy, int dx, int dy);
+
+static stbcc__global_clumpid stbcc__clump_find(stbcc_grid *g, stbcc__global_clumpid n)
+{
+   stbcc__global_clumpid q;
+   stbcc__clump *c = &g->cluster[n.f.cluster_y][n.f.cluster_x].clump[n.f.clump_index];
+
+   if (c->global_label.c == n.c)
+      return n;
+
+   q = stbcc__clump_find(g, c->global_label);
+   c->global_label = q;
+   return q;
+}
+
+typedef struct
+{
+   unsigned int cluster_x;
+   unsigned int cluster_y;
+   unsigned int clump_index;
+} stbcc__unpacked_clumpid;
+
+static void stbcc__clump_union(stbcc_grid *g, stbcc__unpacked_clumpid m, int x, int y, int idx)
+{
+   stbcc__clump *mc = &g->cluster[m.cluster_y][m.cluster_x].clump[m.clump_index];
+   stbcc__clump *nc = &g->cluster[y][x].clump[idx];
+   stbcc__global_clumpid mp = stbcc__clump_find(g, mc->global_label);
+   stbcc__global_clumpid np = stbcc__clump_find(g, nc->global_label);
+
+   if (mp.c == np.c)
+      return;
+
+   g->cluster[mp.f.cluster_y][mp.f.cluster_x].clump[mp.f.clump_index].global_label = np;
+}
+
+static void stbcc__build_connected_components_for_clumps(stbcc_grid *g)
+{
+   int i,j,k,h;
+
+   for (j=0; j < STBCC__CLUSTER_COUNT_Y; ++j) {
+      for (i=0; i < STBCC__CLUSTER_COUNT_X; ++i) {
+         stbcc__cluster *cluster = &g->cluster[j][i];
+         for (k=0; k < (int) cluster->num_edge_clumps; ++k) {
+            stbcc__global_clumpid m;
+            m.f.clump_index = k;
+            m.f.cluster_x = i;
+            m.f.cluster_y = j;
+            assert((int) m.f.clump_index == k && (int) m.f.cluster_x == i && (int) m.f.cluster_y == j);
+            cluster->clump[k].global_label = m;
+         }
+      }
+   }
+
+   for (j=0; j < STBCC__CLUSTER_COUNT_Y; ++j) {
+      for (i=0; i < STBCC__CLUSTER_COUNT_X; ++i) {
+         stbcc__cluster *cluster = &g->cluster[j][i];
+         for (k=0; k < (int) cluster->num_edge_clumps; ++k) {
+            stbcc__clump *clump = &cluster->clump[k];
+            stbcc__unpacked_clumpid m;
+            stbcc__relative_clumpid *adj;
+            m.clump_index = k;
+            m.cluster_x = i;
+            m.cluster_y = j;
+            adj = &cluster->adjacency_storage[clump->adjacent_clump_list_index];
+            for (h=0; h < clump->num_adjacent; ++h) {
+               unsigned int clump_index = adj[h].clump_index;
+               unsigned int x = adj[h].cluster_dx + i;
+               unsigned int y = adj[h].cluster_dy + j;
+               stbcc__clump_union(g, m, x, y, clump_index);
+            }
+         }
+      }
+   }
+
+   for (j=0; j < STBCC__CLUSTER_COUNT_Y; ++j) {
+      for (i=0; i < STBCC__CLUSTER_COUNT_X; ++i) {
+         stbcc__cluster *cluster = &g->cluster[j][i];
+         for (k=0; k < (int) cluster->num_edge_clumps; ++k) {
+            stbcc__global_clumpid m;
+            m.f.clump_index = k;
+            m.f.cluster_x = i;
+            m.f.cluster_y = j;
+            stbcc__clump_find(g, m);
+         }
+      }
+   }
+}
+
+static void stbcc__build_all_connections_for_cluster(stbcc_grid *g, int cx, int cy)
+{
+   // in this particular case, we are fully non-incremental. that means we
+   // can discover the correct sizes for the arrays, but requires we build
+   // the data into temporary data structures, or just count the sizes, so
+   // for simplicity we do the latter
+   stbcc__cluster *cluster = &g->cluster[cy][cx];
+   unsigned char connected[STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER][STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER/8]; // 64 x 8 => 1KB
+   unsigned char num_adj[STBCC__MAX_CLUMPS_PER_CLUSTER] = { 0 };
+   int x = cx * STBCC__CLUSTER_SIZE_X;
+   int y = cy * STBCC__CLUSTER_SIZE_Y;
+   int step_x, step_y=0, i, j, k, n, m, dx, dy, total;
+   int extra;
+
+   g->cluster[cy][cx].rebuild_adjacency = 0;
+
+   total = 0;
+   for (m=0; m < 4; ++m) {
+      switch (m) {
+         case 0:
+            dx = 1, dy = 0;
+            step_x = 0, step_y = 1;
+            i = STBCC__CLUSTER_SIZE_X-1;
+            j = 0;
+            n = STBCC__CLUSTER_SIZE_Y;
+            break;
+         case 1:
+            dx = -1, dy = 0;
+            i = 0;
+            j = 0;
+            step_x = 0;
+            step_y = 1;
+            n = STBCC__CLUSTER_SIZE_Y;
+            break;
+         case 2:
+            dy = -1, dx = 0;
+            i = 0;
+            j = 0;
+            step_x = 1;
+            step_y = 0;
+            n = STBCC__CLUSTER_SIZE_X;
+            break;
+         case 3:
+            dy = 1, dx = 0;
+            i = 0;
+            j = STBCC__CLUSTER_SIZE_Y-1;
+            step_x = 1;
+            step_y = 0;
+            n = STBCC__CLUSTER_SIZE_X;
+            break;
+      }
+
+      if (cx+dx < 0 || cx+dx >= g->cw || cy+dy < 0 || cy+dy >= g->ch)
+         continue;
+
+      memset(connected, 0, sizeof(connected));
+      for (k=0; k < n; ++k) {
+         if (STBCC__MAP_OPEN(g, x+i, y+j) && STBCC__MAP_OPEN(g, x+i+dx, y+j+dy)) {
+            stbcc__clumpid src = g->clump_for_node[y+j][x+i];
+            stbcc__clumpid dest = g->clump_for_node[y+j+dy][x+i+dx];
+            if (0 == (connected[src][dest>>3] & (1 << (dest & 7)))) {
+               connected[src][dest>>3] |= 1 << (dest & 7);
+               ++num_adj[src];
+               ++total;
+            }
+         }
+         i += step_x;
+         j += step_y;
+      }
+   }
+
+   assert(total <= STBCC__CLUSTER_ADJACENCY_COUNT);
+
+   // decide how to apportion unused adjacency slots; only clumps that lie
+   // on the edges of the cluster need adjacency slots, so divide them up
+   // evenly between those clumps
+
+   // we want:
+   //    extra = (STBCC__CLUSTER_ADJACENCY_COUNT - total) / cluster->num_edge_clumps;
+   // but we efficiently approximate this without a divide, because
+   // ignoring edge-vs-non-edge with 'num_adj[i]*2' was faster than
+   // 'num_adj[i]+extra' with the divide
+   if      (total + (cluster->num_edge_clumps<<2) <= STBCC__CLUSTER_ADJACENCY_COUNT)
+      extra = 4;
+   else if (total + (cluster->num_edge_clumps<<1) <= STBCC__CLUSTER_ADJACENCY_COUNT)
+      extra = 2;
+   else if (total + (cluster->num_edge_clumps<<0) <= STBCC__CLUSTER_ADJACENCY_COUNT)
+      extra = 1;
+   else
+      extra = 0;
+
+   total = 0;
+   for (i=0; i < (int) cluster->num_edge_clumps; ++i) {
+      int alloc = num_adj[i]+extra;
+      if (alloc > STBCC__MAX_EXITS_PER_CLUSTER)
+         alloc = STBCC__MAX_EXITS_PER_CLUSTER;
+      assert(total < 256); // must fit in byte
+      cluster->clump[i].adjacent_clump_list_index = (unsigned char) total;
+      cluster->clump[i].max_adjacent = alloc;
+      cluster->clump[i].num_adjacent = 0;
+      total += alloc;
+   }
+   assert(total <= STBCC__CLUSTER_ADJACENCY_COUNT);
+
+   stbcc__add_connections_to_adjacent_cluster(g, cx, cy, -1, 0);
+   stbcc__add_connections_to_adjacent_cluster(g, cx, cy,  1, 0);
+   stbcc__add_connections_to_adjacent_cluster(g, cx, cy,  0,-1);
+   stbcc__add_connections_to_adjacent_cluster(g, cx, cy,  0, 1);
+   // make sure all of the above succeeded.
+   assert(g->cluster[cy][cx].rebuild_adjacency == 0);
+}
+
+static void stbcc__add_connections_to_adjacent_cluster_with_rebuild(stbcc_grid *g, int cx, int cy, int dx, int dy)
+{
+   if (cx >= 0 && cx < g->cw && cy >= 0 && cy < g->ch) {
+      stbcc__add_connections_to_adjacent_cluster(g, cx, cy, dx, dy);
+      if (g->cluster[cy][cx].rebuild_adjacency)
+         stbcc__build_all_connections_for_cluster(g, cx, cy);
+   }
+}
+
+void stbcc_update_grid(stbcc_grid *g, int x, int y, int solid)
+{
+   int cx,cy;
+
+   if (!solid) {
+      if (STBCC__MAP_OPEN(g,x,y))
+         return;
+   } else {
+      if (!STBCC__MAP_OPEN(g,x,y))
+         return;
+   }
+
+   cx = STBCC__CLUSTER_X_FOR_COORD_X(x);
+   cy = STBCC__CLUSTER_Y_FOR_COORD_Y(y);
+
+   stbcc__remove_connections_to_adjacent_cluster(g, cx-1, cy,  1, 0);
+   stbcc__remove_connections_to_adjacent_cluster(g, cx+1, cy, -1, 0);
+   stbcc__remove_connections_to_adjacent_cluster(g, cx, cy-1,  0, 1);
+   stbcc__remove_connections_to_adjacent_cluster(g, cx, cy+1,  0,-1);
+
+   if (!solid)
+      STBCC__MAP_BYTE(g,x,y) |= STBCC__MAP_BYTE_MASK(x,y);
+   else
+      STBCC__MAP_BYTE(g,x,y) &= ~STBCC__MAP_BYTE_MASK(x,y);
+
+   stbcc__build_clumps_for_cluster(g, cx, cy);
+   stbcc__build_all_connections_for_cluster(g, cx, cy);
+
+   stbcc__add_connections_to_adjacent_cluster_with_rebuild(g, cx-1, cy,  1, 0);
+   stbcc__add_connections_to_adjacent_cluster_with_rebuild(g, cx+1, cy, -1, 0);
+   stbcc__add_connections_to_adjacent_cluster_with_rebuild(g, cx, cy-1,  0, 1);
+   stbcc__add_connections_to_adjacent_cluster_with_rebuild(g, cx, cy+1,  0,-1);
+
+   if (!g->in_batched_update)
+      stbcc__build_connected_components_for_clumps(g);
+   #if 0
+   else
+      g->cluster_dirty[cy][cx] = 1;
+   #endif
+}
+
+void stbcc_update_batch_begin(stbcc_grid *g)
+{
+   assert(!g->in_batched_update);
+   g->in_batched_update = 1;
+}
+
+void stbcc_update_batch_end(stbcc_grid *g)
+{
+   assert(g->in_batched_update);
+   g->in_batched_update =  0;
+   stbcc__build_connected_components_for_clumps(g); // @OPTIMIZE: only do this if update was non-empty
+}
+
+size_t stbcc_grid_sizeof(void)
+{
+   return sizeof(stbcc_grid);
+}
+
+void stbcc_init_grid(stbcc_grid *g, unsigned char *map, int w, int h)
+{
+   int i,j,k;
+   assert(w % STBCC__CLUSTER_SIZE_X == 0);
+   assert(h % STBCC__CLUSTER_SIZE_Y == 0);
+   assert(w % 8 == 0);
+
+   g->w = w;
+   g->h = h;
+   g->cw = w >> STBCC_CLUSTER_SIZE_X_LOG2;
+   g->ch = h >> STBCC_CLUSTER_SIZE_Y_LOG2;
+   g->in_batched_update = 0;
+
+   #if 0
+   for (j=0; j < STBCC__CLUSTER_COUNT_Y; ++j)
+      for (i=0; i < STBCC__CLUSTER_COUNT_X; ++i)
+         g->cluster_dirty[j][i] = 0;
+   #endif
+
+   for (j=0; j < h; ++j) {
+      for (i=0; i < w; i += 8) {
+         unsigned char c = 0;
+         for (k=0; k < 8; ++k)
+            if (map[j*w + (i+k)] == 0)
+               c |= (1 << k);
+         g->map[j][i>>3] = c;
+      }
+   }
+
+   for (j=0; j < g->ch; ++j)
+      for (i=0; i < g->cw; ++i)
+         stbcc__build_clumps_for_cluster(g, i, j);
+
+   for (j=0; j < g->ch; ++j)
+      for (i=0; i < g->cw; ++i)
+         stbcc__build_all_connections_for_cluster(g, i, j);
+
+   stbcc__build_connected_components_for_clumps(g);
+
+   for (j=0; j < g->h; ++j)
+      for (i=0; i < g->w; ++i)
+         assert(g->clump_for_node[j][i] <= STBCC__NULL_CLUMPID);
+}
+
+
+static void stbcc__add_clump_connection(stbcc_grid *g, int x1, int y1, int x2, int y2)
+{
+   stbcc__cluster *cluster;
+   stbcc__clump *clump;
+
+   int cx1 = STBCC__CLUSTER_X_FOR_COORD_X(x1);
+   int cy1 = STBCC__CLUSTER_Y_FOR_COORD_Y(y1);
+   int cx2 = STBCC__CLUSTER_X_FOR_COORD_X(x2);
+   int cy2 = STBCC__CLUSTER_Y_FOR_COORD_Y(y2);
+
+   stbcc__clumpid c1 = g->clump_for_node[y1][x1];
+   stbcc__clumpid c2 = g->clump_for_node[y2][x2];
+
+   stbcc__relative_clumpid rc;
+
+   assert(cx1 != cx2 || cy1 != cy2);
+   assert(abs(cx1-cx2) + abs(cy1-cy2) == 1);
+
+   // add connection to c2 in c1
+
+   rc.clump_index = c2;
+   rc.cluster_dx = x2-x1;
+   rc.cluster_dy = y2-y1;
+
+   cluster = &g->cluster[cy1][cx1];
+   clump = &cluster->clump[c1];
+   assert(clump->num_adjacent <= clump->max_adjacent);
+   if (clump->num_adjacent == clump->max_adjacent)
+      g->cluster[cy1][cx1].rebuild_adjacency = 1;
+   else {
+      stbcc__relative_clumpid *adj = &cluster->adjacency_storage[clump->adjacent_clump_list_index];
+      assert(clump->num_adjacent < STBCC__MAX_EXITS_PER_CLUMP);
+      assert(clump->adjacent_clump_list_index + clump->num_adjacent <= STBCC__CLUSTER_ADJACENCY_COUNT);
+      adj[clump->num_adjacent++] = rc;
+   }
+}
+
+static void stbcc__remove_clump_connection(stbcc_grid *g, int x1, int y1, int x2, int y2)
+{
+   stbcc__cluster *cluster;
+   stbcc__clump *clump;
+   stbcc__relative_clumpid *adj;
+   int i;
+
+   int cx1 = STBCC__CLUSTER_X_FOR_COORD_X(x1);
+   int cy1 = STBCC__CLUSTER_Y_FOR_COORD_Y(y1);
+   int cx2 = STBCC__CLUSTER_X_FOR_COORD_X(x2);
+   int cy2 = STBCC__CLUSTER_Y_FOR_COORD_Y(y2);
+
+   stbcc__clumpid c1 = g->clump_for_node[y1][x1];
+   stbcc__clumpid c2 = g->clump_for_node[y2][x2];
+
+   stbcc__relative_clumpid rc;
+
+   assert(cx1 != cx2 || cy1 != cy2);
+   assert(abs(cx1-cx2) + abs(cy1-cy2) == 1);
+
+   // add connection to c2 in c1
+
+   rc.clump_index = c2;
+   rc.cluster_dx = x2-x1;
+   rc.cluster_dy = y2-y1;
+
+   cluster = &g->cluster[cy1][cx1];
+   clump = &cluster->clump[c1];
+   adj = &cluster->adjacency_storage[clump->adjacent_clump_list_index];
+
+   for (i=0; i < clump->num_adjacent; ++i)
+      if (rc.clump_index == adj[i].clump_index &&
+          rc.cluster_dx  == adj[i].cluster_dx  &&
+          rc.cluster_dy  == adj[i].cluster_dy)
+         break;
+
+   if (i < clump->num_adjacent)
+      adj[i] = adj[--clump->num_adjacent];
+   else
+      assert(0);
+}
+
+static void stbcc__add_connections_to_adjacent_cluster(stbcc_grid *g, int cx, int cy, int dx, int dy)
+{
+   unsigned char connected[STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER][STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER/8] = { { 0 } };
+   int x = cx * STBCC__CLUSTER_SIZE_X;
+   int y = cy * STBCC__CLUSTER_SIZE_Y;
+   int step_x, step_y=0, i, j, k, n;
+
+   if (cx < 0 || cx >= g->cw || cy < 0 || cy >= g->ch)
+      return;
+
+   if (cx+dx < 0 || cx+dx >= g->cw || cy+dy < 0 || cy+dy >= g->ch)
+      return;
+
+   if (g->cluster[cy][cx].rebuild_adjacency)
+      return;
+
+   assert(abs(dx) + abs(dy) == 1);
+
+   if (dx == 1) {
+      i = STBCC__CLUSTER_SIZE_X-1;
+      j = 0;
+      step_x = 0;
+      step_y = 1;
+      n = STBCC__CLUSTER_SIZE_Y;
+   } else if (dx == -1) {
+      i = 0;
+      j = 0;
+      step_x = 0;
+      step_y = 1;
+      n = STBCC__CLUSTER_SIZE_Y;
+   } else if (dy == -1) {
+      i = 0;
+      j = 0;
+      step_x = 1;
+      step_y = 0;
+      n = STBCC__CLUSTER_SIZE_X;
+   } else if (dy == 1) {
+      i = 0;
+      j = STBCC__CLUSTER_SIZE_Y-1;
+      step_x = 1;
+      step_y = 0;
+      n = STBCC__CLUSTER_SIZE_X;
+   } else {
+      assert(0);
+      return;
+   }
+
+   for (k=0; k < n; ++k) {
+      if (STBCC__MAP_OPEN(g, x+i, y+j) && STBCC__MAP_OPEN(g, x+i+dx, y+j+dy)) {
+         stbcc__clumpid src = g->clump_for_node[y+j][x+i];
+         stbcc__clumpid dest = g->clump_for_node[y+j+dy][x+i+dx];
+         if (0 == (connected[src][dest>>3] & (1 << (dest & 7)))) {
+            assert((dest>>3) < sizeof(connected));
+            connected[src][dest>>3] |= 1 << (dest & 7);
+            stbcc__add_clump_connection(g, x+i, y+j, x+i+dx, y+j+dy);
+            if (g->cluster[cy][cx].rebuild_adjacency)
+               break;
+         }
+      }
+      i += step_x;
+      j += step_y;
+   }
+}
+
+static void stbcc__remove_connections_to_adjacent_cluster(stbcc_grid *g, int cx, int cy, int dx, int dy)
+{
+   unsigned char disconnected[STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER][STBCC__MAX_EDGE_CLUMPS_PER_CLUSTER/8] = { { 0 } };
+   int x = cx * STBCC__CLUSTER_SIZE_X;
+   int y = cy * STBCC__CLUSTER_SIZE_Y;
+   int step_x, step_y=0, i, j, k, n;
+
+   if (cx < 0 || cx >= g->cw || cy < 0 || cy >= g->ch)
+      return;
+
+   if (cx+dx < 0 || cx+dx >= g->cw || cy+dy < 0 || cy+dy >= g->ch)
+      return;
+
+   assert(abs(dx) + abs(dy) == 1);
+
+   if (dx == 1) {
+      i = STBCC__CLUSTER_SIZE_X-1;
+      j = 0;
+      step_x = 0;
+      step_y = 1;
+      n = STBCC__CLUSTER_SIZE_Y;
+   } else if (dx == -1) {
+      i = 0;
+      j = 0;
+      step_x = 0;
+      step_y = 1;
+      n = STBCC__CLUSTER_SIZE_Y;
+   } else if (dy == -1) {
+      i = 0;
+      j = 0;
+      step_x = 1;
+      step_y = 0;
+      n = STBCC__CLUSTER_SIZE_X;
+   } else if (dy == 1) {
+      i = 0;
+      j = STBCC__CLUSTER_SIZE_Y-1;
+      step_x = 1;
+      step_y = 0;
+      n = STBCC__CLUSTER_SIZE_X;
+   } else {
+      assert(0);
+      return;
+   }
+
+   for (k=0; k < n; ++k) {
+      if (STBCC__MAP_OPEN(g, x+i, y+j) && STBCC__MAP_OPEN(g, x+i+dx, y+j+dy)) {
+         stbcc__clumpid src = g->clump_for_node[y+j][x+i];
+         stbcc__clumpid dest = g->clump_for_node[y+j+dy][x+i+dx];
+         if (0 == (disconnected[src][dest>>3] & (1 << (dest & 7)))) {
+            disconnected[src][dest>>3] |= 1 << (dest & 7);
+            stbcc__remove_clump_connection(g, x+i, y+j, x+i+dx, y+j+dy);
+         }
+      }
+      i += step_x;
+      j += step_y;
+   }
+}
+
+static stbcc__tinypoint stbcc__incluster_find(stbcc__cluster_build_info *cbi, int x, int y)
+{
+   stbcc__tinypoint p,q;
+   p = cbi->parent[y][x];
+   if (p.x == x && p.y == y)
+      return p;
+   q = stbcc__incluster_find(cbi, p.x, p.y);
+   cbi->parent[y][x] = q;
+   return q;
+}
+
+static void stbcc__incluster_union(stbcc__cluster_build_info *cbi, int x1, int y1, int x2, int y2)
+{
+   stbcc__tinypoint p = stbcc__incluster_find(cbi, x1,y1);
+   stbcc__tinypoint q = stbcc__incluster_find(cbi, x2,y2);
+
+   if (p.x == q.x && p.y == q.y)
+      return;
+
+   cbi->parent[p.y][p.x] = q;
+}
+
+static void stbcc__switch_root(stbcc__cluster_build_info *cbi, int x, int y, stbcc__tinypoint p)
+{
+   cbi->parent[p.y][p.x].x = x;
+   cbi->parent[p.y][p.x].y = y;
+   cbi->parent[y][x].x = x;
+   cbi->parent[y][x].y = y;
+}
+
+static void stbcc__build_clumps_for_cluster(stbcc_grid *g, int cx, int cy)
+{
+   stbcc__cluster *c;
+   stbcc__cluster_build_info cbi;
+   int label=0;
+   int i,j;
+   int x = cx * STBCC__CLUSTER_SIZE_X;
+   int y = cy * STBCC__CLUSTER_SIZE_Y;
+
+   // set initial disjoint set forest state
+   for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j) {
+      for (i=0; i < STBCC__CLUSTER_SIZE_X; ++i) {
+         cbi.parent[j][i].x = i;
+         cbi.parent[j][i].y = j;
+      }
+   }
+
+   // join all sets that are connected
+   for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j) {
+      // check down only if not on bottom row
+      if (j < STBCC__CLUSTER_SIZE_Y-1)
+         for (i=0; i < STBCC__CLUSTER_SIZE_X; ++i)
+            if (STBCC__MAP_OPEN(g,x+i,y+j) && STBCC__MAP_OPEN(g,x+i  ,y+j+1))
+               stbcc__incluster_union(&cbi, i,j, i,j+1);
+      // check right for everything but rightmost column
+      for (i=0; i < STBCC__CLUSTER_SIZE_X-1; ++i)
+         if (STBCC__MAP_OPEN(g,x+i,y+j) && STBCC__MAP_OPEN(g,x+i+1,y+j  ))
+            stbcc__incluster_union(&cbi, i,j, i+1,j);
+   }
+
+   // label all non-empty clumps along edges so that all edge clumps are first
+   // in list; this means in degenerate case we can skip traversing non-edge clumps.
+   // because in the first pass we only label leaders, we swap the leader to the
+   // edge first
+
+   // first put solid labels on all the edges; these will get overwritten if they're open
+   for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j)
+      cbi.label[j][0] = cbi.label[j][STBCC__CLUSTER_SIZE_X-1] = STBCC__NULL_CLUMPID;
+   for (i=0; i < STBCC__CLUSTER_SIZE_X; ++i)
+      cbi.label[0][i] = cbi.label[STBCC__CLUSTER_SIZE_Y-1][i] = STBCC__NULL_CLUMPID;
+
+   for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j) {
+      i = 0;
+      if (STBCC__MAP_OPEN(g, x+i, y+j)) {
+         stbcc__tinypoint p = stbcc__incluster_find(&cbi, i,j);
+         if (p.x == i && p.y == j)
+            // if this is the leader, give it a label
+            cbi.label[j][i] = label++;
+         else if (!(p.x == 0 || p.x == STBCC__CLUSTER_SIZE_X-1 || p.y == 0 || p.y == STBCC__CLUSTER_SIZE_Y-1)) {
+            // if leader is in interior, promote this edge node to leader and label
+            stbcc__switch_root(&cbi, i, j, p);
+            cbi.label[j][i] = label++;
+         }
+         // else if leader is on edge, do nothing (it'll get labelled when we reach it)
+      }
+      i = STBCC__CLUSTER_SIZE_X-1;
+      if (STBCC__MAP_OPEN(g, x+i, y+j)) {
+         stbcc__tinypoint p = stbcc__incluster_find(&cbi, i,j);
+         if (p.x == i && p.y == j)
+            cbi.label[j][i] = label++;
+         else if (!(p.x == 0 || p.x == STBCC__CLUSTER_SIZE_X-1 || p.y == 0 || p.y == STBCC__CLUSTER_SIZE_Y-1)) {
+            stbcc__switch_root(&cbi, i, j, p);
+            cbi.label[j][i] = label++;
+         }
+      }
+   }
+
+   for (i=1; i < STBCC__CLUSTER_SIZE_Y-1; ++i) {
+      j = 0;
+      if (STBCC__MAP_OPEN(g, x+i, y+j)) {
+         stbcc__tinypoint p = stbcc__incluster_find(&cbi, i,j);
+         if (p.x == i && p.y == j)
+            cbi.label[j][i] = label++;
+         else if (!(p.x == 0 || p.x == STBCC__CLUSTER_SIZE_X-1 || p.y == 0 || p.y == STBCC__CLUSTER_SIZE_Y-1)) {
+            stbcc__switch_root(&cbi, i, j, p);
+            cbi.label[j][i] = label++;
+         }
+      }
+      j = STBCC__CLUSTER_SIZE_Y-1;
+      if (STBCC__MAP_OPEN(g, x+i, y+j)) {
+         stbcc__tinypoint p = stbcc__incluster_find(&cbi, i,j);
+         if (p.x == i && p.y == j)
+            cbi.label[j][i] = label++;
+         else if (!(p.x == 0 || p.x == STBCC__CLUSTER_SIZE_X-1 || p.y == 0 || p.y == STBCC__CLUSTER_SIZE_Y-1)) {
+            stbcc__switch_root(&cbi, i, j, p);
+            cbi.label[j][i] = label++;
+         }
+      }
+   }
+
+   c = &g->cluster[cy][cx];
+   c->num_edge_clumps = label;
+
+   // label any internal clusters
+   for (j=1; j < STBCC__CLUSTER_SIZE_Y-1; ++j) {
+      for (i=1; i < STBCC__CLUSTER_SIZE_X-1; ++i) {
+         stbcc__tinypoint p = cbi.parent[j][i];
+         if (p.x == i && p.y == j) {
+            if (STBCC__MAP_OPEN(g,x+i,y+j))
+               cbi.label[j][i] = label++;
+            else
+               cbi.label[j][i] = STBCC__NULL_CLUMPID;
+         }
+      }
+   }
+
+   // label all other nodes
+   for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j) {
+      for (i=0; i < STBCC__CLUSTER_SIZE_X; ++i) {
+         stbcc__tinypoint p = stbcc__incluster_find(&cbi, i,j);
+         if (p.x != i || p.y != j) {
+            if (STBCC__MAP_OPEN(g,x+i,y+j))
+               cbi.label[j][i] = cbi.label[p.y][p.x];
+         }
+         if (STBCC__MAP_OPEN(g,x+i,y+j))
+            assert(cbi.label[j][i] != STBCC__NULL_CLUMPID);
+      }
+   }
+
+   c->num_clumps = label;
+
+   for (i=0; i < label; ++i) {
+      c->clump[i].num_adjacent = 0;
+      c->clump[i].max_adjacent = 0;
+   }
+
+   for (j=0; j < STBCC__CLUSTER_SIZE_Y; ++j)
+      for (i=0; i < STBCC__CLUSTER_SIZE_X; ++i) {
+         g->clump_for_node[y+j][x+i] = cbi.label[j][i]; // @OPTIMIZE: remove cbi.label entirely
+         assert(g->clump_for_node[y+j][x+i] <= STBCC__NULL_CLUMPID);
+      }
+
+   // set the global label for all interior clumps since they can't have connections,
+   // so we don't have to do this on the global pass (brings from O(N) to O(N^0.75))
+   for (i=(int) c->num_edge_clumps; i < (int) c->num_clumps; ++i) {
+      stbcc__global_clumpid gc;
+      gc.f.cluster_x = cx;
+      gc.f.cluster_y = cy;
+      gc.f.clump_index = i;
+      c->clump[i].global_label = gc;
+   }
+
+   c->rebuild_adjacency = 1; // flag that it has no valid adjacency data
+}
+
+#endif // STB_CONNECTED_COMPONENTS_IMPLEMENTATION
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_divide.h
@@ -1,0 +1,433 @@
+// stb_divide.h - v0.94 - public domain - Sean Barrett, Feb 2010
+// Three kinds of divide/modulus of signed integers.
+//
+// HISTORY
+//
+//   v0.94              Fix integer overflow issues
+//   v0.93  2020-02-02  Write useful exit() value from main()
+//   v0.92  2019-02-25  Fix warning
+//   v0.91  2010-02-27  Fix euclidean division by INT_MIN for non-truncating C
+//                      Check result with 64-bit math to catch such cases
+//   v0.90  2010-02-24  First public release
+//
+// USAGE
+//
+// In *ONE* source file, put:
+//
+//    #define STB_DIVIDE_IMPLEMENTATION
+//    // #define C_INTEGER_DIVISION_TRUNCATES  // see Note 1
+//    // #define C_INTEGER_DIVISION_FLOORS     // see Note 2
+//    #include "stb_divide.h"
+//
+// Other source files should just include stb_divide.h
+//
+// Note 1: On platforms/compilers that you know signed C division
+// truncates, you can #define C_INTEGER_DIVISION_TRUNCATES.
+//
+// Note 2: On platforms/compilers that you know signed C division
+// floors (rounds to negative infinity), you can #define
+// C_INTEGER_DIVISION_FLOORS.
+//
+// You can #define STB_DIVIDE_TEST in which case the implementation
+// will generate a main() and compiling the result will create a
+// program that tests the implementation. Run it with no arguments
+// and any output indicates an error; run it with any argument and
+// it will also print the test results. Define STB_DIVIDE_TEST_64
+// to a 64-bit integer type to avoid overflows in the result-checking
+// which give false negatives.
+//
+// ABOUT
+//
+// This file provides three different consistent divide/mod pairs
+// implemented on top of arbitrary C/C++ division, including correct
+// handling of overflow of intermediate calculations:
+//
+//     trunc:   a/b truncates to 0,           a%b has same sign as a
+//     floor:   a/b truncates to -inf,        a%b has same sign as b
+//     eucl:    a/b truncates to sign(b)*inf, a%b is non-negative
+//
+// Not necessarily optimal; I tried to keep it generally efficient,
+// but there may be better ways.
+//
+// Briefly, for those who are not familiar with the problem, we note
+// the reason these divides exist and are interesting:
+//
+//     'trunc' is easy to implement in hardware (strip the signs,
+//          compute, reapply the signs), thus is commonly defined
+//          by many languages (including C99)
+//
+//     'floor' is simple to define and better behaved than trunc;
+//          for example it divides integers into fixed-size buckets
+//          without an extra-wide bucket at 0, and for a fixed
+//          divisor N there are only |N| possible moduli.
+//
+//     'eucl' guarantees fixed-sized buckets *and* a non-negative
+//          modulus and defines division to be whatever is needed
+//          to achieve that result.
+//
+// See "The Euclidean definition of the functions div and mod"
+// by Raymond Boute (1992), or "Division and Modulus for Computer
+// Scientists" by Daan Leijen (2001)
+//
+// We assume of the built-in C division:
+//     (a) modulus is the remainder for the corresponding division
+//     (b) a/b truncates if a and b are the same sign
+//
+// Property (a) requires (a/b)*b + (a%b)==a, and is required by C.
+// Property (b) seems to be true of all hardware but is *not* satisfied
+// by the euclidean division operator we define, so it's possibly not
+// always true. If any such platform turns up, we can add more cases.
+// (Possibly only stb_div_trunc currently relies on property (b).)
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+
+#ifndef INCLUDE_STB_DIVIDE_H
+#define INCLUDE_STB_DIVIDE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int stb_div_trunc(int value_to_be_divided, int value_to_divide_by);
+extern int stb_div_floor(int value_to_be_divided, int value_to_divide_by);
+extern int stb_div_eucl (int value_to_be_divided, int value_to_divide_by);
+extern int stb_mod_trunc(int value_to_be_divided, int value_to_divide_by);
+extern int stb_mod_floor(int value_to_be_divided, int value_to_divide_by);
+extern int stb_mod_eucl (int value_to_be_divided, int value_to_divide_by);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef STB_DIVIDE_IMPLEMENTATION
+
+#if defined(__STDC_VERSION) && __STDC_VERSION__ >= 19901
+   #ifndef C_INTEGER_DIVISION_TRUNCATES
+      #define C_INTEGER_DIVISION_TRUNCATES
+   #endif
+#endif
+
+#ifndef INT_MIN
+#include <limits.h> // if you have no limits.h, #define INT_MIN yourself
+#endif
+
+// the following macros are designed to allow testing
+// other platforms by simulating them
+#ifndef STB_DIVIDE_TEST_FLOOR
+   #define stb__div(a,b)  ((a)/(b))
+   #define stb__mod(a,b)  ((a)%(b))
+#else
+   // implement floor-style divide on trunc platform
+   #ifndef C_INTEGER_DIVISION_TRUNCATES
+   #error "floor test requires truncating division"
+   #endif
+   #undef C_INTEGER_DIVISION_TRUNCATES
+   int stb__div(int v1, int v2)
+   {
+      int q = v1/v2, r = v1%v2;
+      if ((r > 0 && v2 < 0) || (r < 0 && v2 > 0))
+         return q-1;
+      else
+         return q;
+   }
+
+   int stb__mod(int v1, int v2)
+   {
+      int r = v1%v2;
+      if ((r > 0 && v2 < 0) || (r < 0 && v2 > 0))
+         return r+v2;
+      else
+         return r;
+   }
+#endif
+
+int stb_div_trunc(int v1, int v2)
+{
+   #ifdef C_INTEGER_DIVISION_TRUNCATES
+   return v1/v2;
+   #else
+   if (v1 >= 0 && v2 <= 0)
+      return -stb__div(-v1,v2);  // both negative to avoid overflow
+   if (v1 <= 0 && v2 >= 0)
+      if (v1 != INT_MIN)
+         return -stb__div(v1,-v2);    // both negative to avoid overflow
+      else
+         return -stb__div(v1+v2,-v2)-1; // push v1 away from wrap point
+   else
+      return v1/v2;            // same sign, so expect truncation
+   #endif
+}
+
+int stb_div_floor(int v1, int v2)
+{
+   #ifdef C_INTEGER_DIVISION_FLOORS
+   return v1/v2;
+   #else
+   if (v1 >= 0 && v2 < 0) {
+      if (v2 + 1 >= INT_MIN + v1) // check if increasing v1's magnitude overflows
+         return -stb__div((v2+1)-v1,v2); // nope, so just compute it
+      else
+         return -stb__div(-v1,v2) + ((-v1)%v2 ? -1 : 0);
+   }
+   if (v1 < 0 && v2 >= 0) {
+      if (v1 != INT_MIN) {
+         if (v1 + 1 >= INT_MIN + v2) // check if increasing v1's magnitude overflows
+            return -stb__div((v1+1)-v2,-v2); // nope, so just compute it
+         else
+            return -stb__div(-v1,v2) + (stb__mod(v1,-v2) ? -1 : 0);
+      } else // it must be possible to compute -(v1+v2) without overflowing
+         return -stb__div(-(v1+v2),v2) + (stb__mod(-(v1+v2),v2) ? -2 : -1);
+   } else
+      return v1/v2;           // same sign, so expect truncation
+   #endif
+}
+
+int stb_div_eucl(int v1, int v2)
+{
+   int q,r;
+   #ifdef C_INTEGER_DIVISION_TRUNCATES
+   q = v1/v2;
+   r = v1%v2;
+   #else
+   // handle every quadrant separately, since we can't rely on q and r flor
+   if (v1 >= 0)
+      if (v2 >= 0)
+         return stb__div(v1,v2);
+      else if (v2 != INT_MIN)
+         q = -stb__div(v1,-v2), r = stb__mod(v1,-v2);
+      else
+         q = 0, r = v1;
+   else if (v1 != INT_MIN)
+      if (v2 >= 0)
+         q = -stb__div(-v1,v2), r = -stb__mod(-v1,v2);
+      else if (v2 != INT_MIN)
+         q = stb__div(-v1,-v2), r = -stb__mod(-v1,-v2);
+      else // if v2 is INT_MIN, then we can't use -v2, but we can't divide by v2
+         q = 1, r = v1-q*v2;
+   else // if v1 is INT_MIN, we have to move away from overflow place
+      if (v2 >= 0)
+         q = -stb__div(-(v1+v2),v2)-1, r = -stb__mod(-(v1+v2),v2);
+      else if (v2 != INT_MIN)
+         q = stb__div(-(v1-v2),-v2)+1, r = -stb__mod(-(v1-v2),-v2);
+      else // for INT_MIN / INT_MIN, we need to be extra-careful to avoid overflow
+         q = 1, r = 0;
+   #endif
+   if (r >= 0)
+      return q;
+   else
+      return q + (v2 > 0 ? -1 : 1);
+}
+
+int stb_mod_trunc(int v1, int v2)
+{
+   #ifdef C_INTEGER_DIVISION_TRUNCATES
+   return v1%v2;
+   #else
+   if (v1 >= 0) { // modulus result should always be positive
+      int r = stb__mod(v1,v2);
+      if (r >= 0)
+         return r;
+      else
+         return r - (v2 < 0 ? v2 : -v2);
+   } else {    // modulus result should always be negative
+      int r = stb__mod(v1,v2);
+      if (r <= 0)
+         return r;
+      else
+         return r + (v2 < 0 ? v2 : -v2);
+   }
+   #endif
+}
+
+int stb_mod_floor(int v1, int v2)
+{
+   #ifdef C_INTEGER_DIVISION_FLOORS
+   return v1%v2;
+   #else
+   if (v2 >= 0) { // result should always be positive
+      int r = stb__mod(v1,v2);
+      if (r >= 0)
+         return r;
+      else
+         return r + v2;
+   } else { // result should always be negative
+      int r = stb__mod(v1,v2);
+      if (r <= 0)
+         return r;
+      else
+         return r + v2;
+   }
+   #endif
+}
+
+int stb_mod_eucl(int v1, int v2)
+{
+   int r = stb__mod(v1,v2);
+
+   if (r >= 0)
+      return r;
+   else
+      return r - (v2 < 0 ? v2 : -v2); // negative abs() [to avoid overflow]
+}
+
+#ifdef STB_DIVIDE_TEST
+#include <stdio.h>
+#include <math.h>
+#include <limits.h>
+
+int show=0;
+int err=0;
+
+void stbdiv_check(int q, int r, int a, int b, char *type, int dir)
+{
+   if ((dir > 0 && r < 0) || (dir < 0 && r > 0)) {
+      fprintf(stderr, "FAILED: %s(%d,%d) remainder %d in wrong direction\n", type,a,b,r);
+      err++;
+   } else
+      if (b != INT_MIN) // can't compute abs(), but if b==INT_MIN all remainders are valid
+         if (r <= -abs(b) || r >= abs(b)) {
+            fprintf(stderr, "FAILED: %s(%d,%d) remainder %d out of range\n", type,a,b,r);
+            err++;
+         }
+   #ifdef STB_DIVIDE_TEST_64
+   {
+      STB_DIVIDE_TEST_64 q64 = q, r64=r, a64=a, b64=b;
+      if (q64*b64+r64 != a64) {
+         fprintf(stderr, "FAILED: %s(%d,%d) remainder %d doesn't match quotient %d\n", type,a,b,r,q);
+         err++;
+      }
+   }
+   #else
+   if (q*b+r != a) {
+      fprintf(stderr, "FAILED: %s(%d,%d) remainder %d doesn't match quotient %d\n", type,a,b,r,q);
+      err++;
+   }
+   #endif
+}
+
+void test(int a, int b)
+{
+   int q,r;
+   if (show) printf("(%+11d,%+d) |  ", a,b);
+   q = stb_div_trunc(a,b), r = stb_mod_trunc(a,b);
+   if (show) printf("(%+11d,%+2d)  ", q,r); stbdiv_check(q,r,a,b, "trunc",a);
+   q = stb_div_floor(a,b), r = stb_mod_floor(a,b);
+   if (show) printf("(%+11d,%+2d)  ", q,r); stbdiv_check(q,r,a,b, "floor",b);
+   q = stb_div_eucl (a,b), r = stb_mod_eucl (a,b);
+   if (show) printf("(%+11d,%+2d)\n", q,r); stbdiv_check(q,r,a,b, "euclidean",1);
+}
+
+void testh(int a, int b)
+{
+   int q,r;
+   if (show) printf("(%08x,%08x) |\n", a,b);
+   q = stb_div_trunc(a,b), r = stb_mod_trunc(a,b); stbdiv_check(q,r,a,b, "trunc",a);
+   if (show) printf("             (%08x,%08x)", q,r);
+   q = stb_div_floor(a,b), r = stb_mod_floor(a,b); stbdiv_check(q,r,a,b, "floor",b);
+   if (show) printf("   (%08x,%08x)", q,r);
+   q = stb_div_eucl (a,b), r = stb_mod_eucl (a,b); stbdiv_check(q,r,a,b, "euclidean",1);
+   if (show) printf("   (%08x,%08x)\n ", q,r);
+}
+
+int main(int argc, char **argv)
+{
+   if (argc > 1) show=1;
+
+   test(8,3);
+   test(8,-3);
+   test(-8,3);
+   test(-8,-3);
+   test(1,2);
+   test(1,-2);
+   test(-1,2);
+   test(-1,-2);
+   test(8,4);
+   test(8,-4);
+   test(-8,4);
+   test(-8,-4);
+
+   test(INT_MAX,1);
+   test(INT_MIN,1);
+   test(INT_MIN+1,1);
+   test(INT_MAX,-1);
+   //test(INT_MIN,-1); // this traps in MSVC, so we leave it untested
+   test(INT_MIN+1,-1);
+   test(INT_MIN,-2);
+   test(INT_MIN+1,2);
+   test(INT_MIN+1,-2);
+   test(INT_MAX,2);
+   test(INT_MAX,-2);
+   test(INT_MIN+1,2);
+   test(INT_MIN+1,-2);
+   test(INT_MIN,2);
+   test(INT_MIN,-2);
+   test(INT_MIN,7);
+   test(INT_MIN,-7);
+   test(INT_MIN+1,4);
+   test(INT_MIN+1,-4);
+
+   testh(-7, INT_MIN);
+   testh(-1, INT_MIN);
+   testh(1, INT_MIN);
+   testh(7, INT_MIN);
+
+   testh(INT_MAX-1, INT_MIN);
+   testh(INT_MAX,   INT_MIN);
+   testh(INT_MIN,   INT_MIN);
+   testh(INT_MIN+1, INT_MIN);
+
+   testh(INT_MAX-1, INT_MAX);
+   testh(INT_MAX  , INT_MAX);
+   testh(INT_MIN  , INT_MAX);
+   testh(INT_MIN+1, INT_MAX);
+
+   return err > 0 ? 1 : 0;
+}
+#endif // STB_DIVIDE_TEST
+#endif // STB_DIVIDE_IMPLEMENTATION
+#endif // INCLUDE_STB_DIVIDE_H
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- a/include-demo/stb_ds.h
+++ b/include-demo/stb_ds.h
@@ -1,4 +1,4 @@
-/* stb_ds.h - v0.65 - public domain data structures - Sean Barrett 2019
+/* stb_ds.h - v0.67 - public domain data structures - Sean Barrett 2019
 
    This is a single-header-file library that provides easy-to-use
    dynamic arrays and hash tables for C (also works in C++).
@@ -104,7 +104,7 @@
           moving the rest of the array over. Returns b.
 
       arrinsn:
-        void arrins(T* a, int p, int n);
+        void arrinsn(T* a, int p, int n);
           Inserts n uninitialized items into array a starting at a[p],
           moving the rest of the array over.
 
@@ -123,7 +123,7 @@
           Deletes the element at a[p], moving the rest of the array over.
 
       arrdeln:
-        void arrdel(T* a, int p, int n);
+        void arrdeln(T* a, int p, int n);
           Deletes n elements starting at a[p], moving the rest of the array over.
 
       arrdelswap:
@@ -379,6 +379,9 @@
     Andreas Molzer
     github:hashitaku
     github:srdjanstipic
+    Macoy Madson
+    Andreas Vennstrom
+    Tobias Mansfield-Williams
 */
 
 #ifdef STBDS_UNIT_TESTS
@@ -489,6 +492,7 @@
 //
 
 extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap);
+extern void   stbds_arrfreef(void *a);
 extern void   stbds_hmfree_func(void *p, size_t elemsize);
 extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode);
 extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode);
@@ -531,24 +535,25 @@
 #define stbds_temp(t)    stbds_header(t)->temp
 #define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table)
 
-#define stbds_arrsetcap(a,n)  (stbds_arrgrow(a,0,n))
-#define stbds_arrsetlen(a,n)  ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0)
-#define stbds_arrcap(a)       ((a) ? stbds_header(a)->capacity : 0)
-#define stbds_arrlen(a)       ((a) ? (ptrdiff_t) stbds_header(a)->length : 0)
-#define stbds_arrlenu(a)      ((a) ?             stbds_header(a)->length : 0)
-#define stbds_arrput(a,v)     (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v))
-#define stbds_arrpush         stbds_arrput  // synonym
-#define stbds_arrpop(a)       (stbds_header(a)->length--, (a)[stbds_header(a)->length])
-#define stbds_arraddn(a,n)    ((void)(stbds_arraddnoff(a, n)))    // deprecated, use one of the following instead:
-#define stbds_arraddnptr(a,n) (stbds_arrmaybegrow(a,n), stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)])
-#define stbds_arraddnoff(a,n) (stbds_arrmaybegrow(a,n), stbds_header(a)->length += (n), stbds_header(a)->length-(n))
-#define stbds_arrlast(a)      ((a)[stbds_header(a)->length-1])
-#define stbds_arrfree(a)      ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL)
-#define stbds_arrdel(a,i)     stbds_arrdeln(a,i,1)
-#define stbds_arrdeln(a,i,n)  (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n))
-#define stbds_arrdelswap(a,i) ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1)
-#define stbds_arrinsn(a,i,n)  (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i))))
-#define stbds_arrins(a,i,v)   (stbds_arrinsn((a),(i),1), (a)[i]=(v))
+#define stbds_arrsetcap(a,n)   (stbds_arrgrow(a,0,n))
+#define stbds_arrsetlen(a,n)   ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0)
+#define stbds_arrcap(a)        ((a) ? stbds_header(a)->capacity : 0)
+#define stbds_arrlen(a)        ((a) ? (ptrdiff_t) stbds_header(a)->length : 0)
+#define stbds_arrlenu(a)       ((a) ?             stbds_header(a)->length : 0)
+#define stbds_arrput(a,v)      (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v))
+#define stbds_arrpush          stbds_arrput  // synonym
+#define stbds_arrpop(a)        (stbds_header(a)->length--, (a)[stbds_header(a)->length])
+#define stbds_arraddn(a,n)     ((void)(stbds_arraddnindex(a, n)))    // deprecated, use one of the following instead:
+#define stbds_arraddnptr(a,n)  (stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)]) : (a))
+#define stbds_arraddnindex(a,n)(stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), stbds_header(a)->length-(n)) : stbds_arrlen(a))
+#define stbds_arraddnoff       stbds_arraddnindex
+#define stbds_arrlast(a)       ((a)[stbds_header(a)->length-1])
+#define stbds_arrfree(a)       ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL)
+#define stbds_arrdel(a,i)      stbds_arrdeln(a,i,1)
+#define stbds_arrdeln(a,i,n)   (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n))
+#define stbds_arrdelswap(a,i)  ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1)
+#define stbds_arrinsn(a,i,n)   (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i))))
+#define stbds_arrins(a,i,v)    (stbds_arrinsn((a),(i),1), (a)[i]=(v))
 
 #define stbds_arrmaybegrow(a,n)  ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \
                                   ? (stbds_arrgrow(a,n,0),0) : 0)
@@ -595,7 +600,7 @@
 #define stbds_hmget_ts(t, k, temp)  (stbds_hmgetp_ts(t,k,temp)->value)
 #define stbds_hmlen(t)        ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0)
 #define stbds_hmlenu(t)       ((t) ?             stbds_header((t)-1)->length-1 : 0)
-#define stbds_hmgetp_null(t,k)  (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp(t)-1])
+#define stbds_hmgetp_null(t,k)  (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)])
 
 #define stbds_shput(t, k, v) \
     ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING),   \
@@ -646,7 +651,7 @@
 
 #define stbds_shgets(t, k) (*stbds_shgetp(t,k))
 #define stbds_shget(t, k)  (stbds_shgetp(t,k)->value)
-#define stbds_shgetp_null(t,k)  (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp(t)-1])
+#define stbds_shgetp_null(t,k)  (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)])
 #define stbds_shlen        stbds_hmlen
 
 typedef struct
@@ -756,8 +761,10 @@
 
 void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap)
 {
+  stbds_array_header temp={0}; // force debugging
   void *b;
   size_t min_len = stbds_arrlen(a) + addlen;
+  (void) sizeof(temp);
 
   // compute the minimum capacity needed
   if (min_len > min_cap)
@@ -781,6 +788,7 @@
   if (a == NULL) {
     stbds_header(b)->length = 0;
     stbds_header(b)->hash_table = 0;
+    stbds_header(b)->temp = 0;
   } else {
     STBDS_STATS(++stbds_array_grow);
   }
@@ -789,6 +797,11 @@
   return b;
 }
 
+void stbds_arrfreef(void *a)
+{
+  STBDS_FREE(NULL, stbds_header(a));
+}
+
 //
 // stbds_hm hash table implementation
 //
@@ -1359,7 +1372,6 @@
   {
     size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed);
     size_t step = STBDS_BUCKET_LENGTH;
-    size_t limit,i;
     size_t pos;
     ptrdiff_t tombstone = -1;
     stbds_hash_bucket *bucket;
@@ -1370,6 +1382,7 @@
     pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2);
 
     for (;;) {
+      size_t limit, i;
       STBDS_STATS(++stbds_hash_probes);
       bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT];
 
@@ -1378,6 +1391,8 @@
         if (bucket->hash[i] == hash) {
           if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) {
             stbds_temp(a) = bucket->index[i];
+            if (mode >= STBDS_HM_STRING)
+              stbds_temp_key(a) = * (char **) ((char *) raw_a + elemsize*bucket->index[i] + keyoffset);
             return STBDS_ARR_TO_HASH(a,elemsize);
           }
         } else if (bucket->hash[i] == 0) {
--- /dev/null
+++ b/include-demo/stb_dxt.h
@@ -1,0 +1,719 @@
+// stb_dxt.h - v1.12 - DXT1/DXT5 compressor - public domain
+// original by fabian "ryg" giesen - ported to C by stb
+// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation
+//
+// USAGE:
+//   call stb_compress_dxt_block() for every block (you must pad)
+//     source should be a 4x4 block of RGBA data in row-major order;
+//     Alpha channel is not stored if you specify alpha=0 (but you
+//     must supply some constant alpha in the alpha channel).
+//     You can turn on dithering and "high quality" using mode.
+//
+// version history:
+//   v1.12  - (ryg) fix bug in single-color table generator
+//   v1.11  - (ryg) avoid racy global init, better single-color tables, remove dither
+//   v1.10  - (i.c) various small quality improvements
+//   v1.09  - (stb) update documentation re: surprising alpha channel requirement
+//   v1.08  - (stb) fix bug in dxt-with-alpha block
+//   v1.07  - (stb) bc4; allow not using libc; add STB_DXT_STATIC
+//   v1.06  - (stb) fix to known-broken 1.05
+//   v1.05  - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski)
+//   v1.04  - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec);
+//            single color match fix (allow for inexact color interpolation);
+//            optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps.
+//   v1.03  - (stb) endianness support
+//   v1.02  - (stb) fix alpha encoding bug
+//   v1.01  - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom
+//   v1.00  - (stb) first release
+//
+// contributors:
+//   Rich Geldreich (more accurate index selection)
+//   Kevin Schmidt (#defines for "freestanding" compilation)
+//   github:ppiastucki (BC4 support)
+//   Ignacio Castano - improve DXT endpoint quantization
+//   Alan Hickman - static table initialization
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+#ifndef STB_INCLUDE_STB_DXT_H
+#define STB_INCLUDE_STB_DXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_DXT_STATIC
+#define STBDDEF static
+#else
+#define STBDDEF extern
+#endif
+
+// compression mode (bitflags)
+#define STB_DXT_NORMAL    0
+#define STB_DXT_DITHER    1   // use dithering. was always dubious, now deprecated. does nothing!
+#define STB_DXT_HIGHQUAL  2   // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
+
+STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode);
+STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel);
+STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel);
+
+#define STB_COMPRESS_DXT_BLOCK
+
+#ifdef __cplusplus
+}
+#endif
+#endif // STB_INCLUDE_STB_DXT_H
+
+#ifdef STB_DXT_IMPLEMENTATION
+
+// configuration options for DXT encoder. set them in the project/makefile or just define
+// them at the top.
+
+// STB_DXT_USE_ROUNDING_BIAS
+//     use a rounding bias during color interpolation. this is closer to what "ideal"
+//     interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03)
+//     implicitly had this turned on.
+//
+//     in case you're targeting a specific type of hardware (e.g. console programmers):
+//     NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer
+//     to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias.
+//     you also see "(a*5 + b*3) / 8" on some old GPU designs.
+// #define STB_DXT_USE_ROUNDING_BIAS
+
+#include <stdlib.h>
+
+#if !defined(STBD_FABS)
+#include <math.h>
+#endif
+
+#ifndef STBD_FABS
+#define STBD_FABS(x)          fabs(x)
+#endif
+
+static const unsigned char stb__OMatch5[256][2] = {
+   {  0,  0 }, {  0,  0 }, {  0,  1 }, {  0,  1 }, {  1,  0 }, {  1,  0 }, {  1,  0 }, {  1,  1 },
+   {  1,  1 }, {  1,  1 }, {  1,  2 }, {  0,  4 }, {  2,  1 }, {  2,  1 }, {  2,  1 }, {  2,  2 },
+   {  2,  2 }, {  2,  2 }, {  2,  3 }, {  1,  5 }, {  3,  2 }, {  3,  2 }, {  4,  0 }, {  3,  3 },
+   {  3,  3 }, {  3,  3 }, {  3,  4 }, {  3,  4 }, {  3,  4 }, {  3,  5 }, {  4,  3 }, {  4,  3 },
+   {  5,  2 }, {  4,  4 }, {  4,  4 }, {  4,  5 }, {  4,  5 }, {  5,  4 }, {  5,  4 }, {  5,  4 },
+   {  6,  3 }, {  5,  5 }, {  5,  5 }, {  5,  6 }, {  4,  8 }, {  6,  5 }, {  6,  5 }, {  6,  5 },
+   {  6,  6 }, {  6,  6 }, {  6,  6 }, {  6,  7 }, {  5,  9 }, {  7,  6 }, {  7,  6 }, {  8,  4 },
+   {  7,  7 }, {  7,  7 }, {  7,  7 }, {  7,  8 }, {  7,  8 }, {  7,  8 }, {  7,  9 }, {  8,  7 },
+   {  8,  7 }, {  9,  6 }, {  8,  8 }, {  8,  8 }, {  8,  9 }, {  8,  9 }, {  9,  8 }, {  9,  8 },
+   {  9,  8 }, { 10,  7 }, {  9,  9 }, {  9,  9 }, {  9, 10 }, {  8, 12 }, { 10,  9 }, { 10,  9 },
+   { 10,  9 }, { 10, 10 }, { 10, 10 }, { 10, 10 }, { 10, 11 }, {  9, 13 }, { 11, 10 }, { 11, 10 },
+   { 12,  8 }, { 11, 11 }, { 11, 11 }, { 11, 11 }, { 11, 12 }, { 11, 12 }, { 11, 12 }, { 11, 13 },
+   { 12, 11 }, { 12, 11 }, { 13, 10 }, { 12, 12 }, { 12, 12 }, { 12, 13 }, { 12, 13 }, { 13, 12 },
+   { 13, 12 }, { 13, 12 }, { 14, 11 }, { 13, 13 }, { 13, 13 }, { 13, 14 }, { 12, 16 }, { 14, 13 },
+   { 14, 13 }, { 14, 13 }, { 14, 14 }, { 14, 14 }, { 14, 14 }, { 14, 15 }, { 13, 17 }, { 15, 14 },
+   { 15, 14 }, { 16, 12 }, { 15, 15 }, { 15, 15 }, { 15, 15 }, { 15, 16 }, { 15, 16 }, { 15, 16 },
+   { 15, 17 }, { 16, 15 }, { 16, 15 }, { 17, 14 }, { 16, 16 }, { 16, 16 }, { 16, 17 }, { 16, 17 },
+   { 17, 16 }, { 17, 16 }, { 17, 16 }, { 18, 15 }, { 17, 17 }, { 17, 17 }, { 17, 18 }, { 16, 20 },
+   { 18, 17 }, { 18, 17 }, { 18, 17 }, { 18, 18 }, { 18, 18 }, { 18, 18 }, { 18, 19 }, { 17, 21 },
+   { 19, 18 }, { 19, 18 }, { 20, 16 }, { 19, 19 }, { 19, 19 }, { 19, 19 }, { 19, 20 }, { 19, 20 },
+   { 19, 20 }, { 19, 21 }, { 20, 19 }, { 20, 19 }, { 21, 18 }, { 20, 20 }, { 20, 20 }, { 20, 21 },
+   { 20, 21 }, { 21, 20 }, { 21, 20 }, { 21, 20 }, { 22, 19 }, { 21, 21 }, { 21, 21 }, { 21, 22 },
+   { 20, 24 }, { 22, 21 }, { 22, 21 }, { 22, 21 }, { 22, 22 }, { 22, 22 }, { 22, 22 }, { 22, 23 },
+   { 21, 25 }, { 23, 22 }, { 23, 22 }, { 24, 20 }, { 23, 23 }, { 23, 23 }, { 23, 23 }, { 23, 24 },
+   { 23, 24 }, { 23, 24 }, { 23, 25 }, { 24, 23 }, { 24, 23 }, { 25, 22 }, { 24, 24 }, { 24, 24 },
+   { 24, 25 }, { 24, 25 }, { 25, 24 }, { 25, 24 }, { 25, 24 }, { 26, 23 }, { 25, 25 }, { 25, 25 },
+   { 25, 26 }, { 24, 28 }, { 26, 25 }, { 26, 25 }, { 26, 25 }, { 26, 26 }, { 26, 26 }, { 26, 26 },
+   { 26, 27 }, { 25, 29 }, { 27, 26 }, { 27, 26 }, { 28, 24 }, { 27, 27 }, { 27, 27 }, { 27, 27 },
+   { 27, 28 }, { 27, 28 }, { 27, 28 }, { 27, 29 }, { 28, 27 }, { 28, 27 }, { 29, 26 }, { 28, 28 },
+   { 28, 28 }, { 28, 29 }, { 28, 29 }, { 29, 28 }, { 29, 28 }, { 29, 28 }, { 30, 27 }, { 29, 29 },
+   { 29, 29 }, { 29, 30 }, { 29, 30 }, { 30, 29 }, { 30, 29 }, { 30, 29 }, { 30, 30 }, { 30, 30 },
+   { 30, 30 }, { 30, 31 }, { 30, 31 }, { 31, 30 }, { 31, 30 }, { 31, 30 }, { 31, 31 }, { 31, 31 },
+};
+static const unsigned char stb__OMatch6[256][2] = {
+   {  0,  0 }, {  0,  1 }, {  1,  0 }, {  1,  1 }, {  1,  1 }, {  1,  2 }, {  2,  1 }, {  2,  2 },
+   {  2,  2 }, {  2,  3 }, {  3,  2 }, {  3,  3 }, {  3,  3 }, {  3,  4 }, {  4,  3 }, {  4,  4 },
+   {  4,  4 }, {  4,  5 }, {  5,  4 }, {  5,  5 }, {  5,  5 }, {  5,  6 }, {  6,  5 }, {  6,  6 },
+   {  6,  6 }, {  6,  7 }, {  7,  6 }, {  7,  7 }, {  7,  7 }, {  7,  8 }, {  8,  7 }, {  8,  8 },
+   {  8,  8 }, {  8,  9 }, {  9,  8 }, {  9,  9 }, {  9,  9 }, {  9, 10 }, { 10,  9 }, { 10, 10 },
+   { 10, 10 }, { 10, 11 }, { 11, 10 }, {  8, 16 }, { 11, 11 }, { 11, 12 }, { 12, 11 }, {  9, 17 },
+   { 12, 12 }, { 12, 13 }, { 13, 12 }, { 11, 16 }, { 13, 13 }, { 13, 14 }, { 14, 13 }, { 12, 17 },
+   { 14, 14 }, { 14, 15 }, { 15, 14 }, { 14, 16 }, { 15, 15 }, { 15, 16 }, { 16, 14 }, { 16, 15 },
+   { 17, 14 }, { 16, 16 }, { 16, 17 }, { 17, 16 }, { 18, 15 }, { 17, 17 }, { 17, 18 }, { 18, 17 },
+   { 20, 14 }, { 18, 18 }, { 18, 19 }, { 19, 18 }, { 21, 15 }, { 19, 19 }, { 19, 20 }, { 20, 19 },
+   { 20, 20 }, { 20, 20 }, { 20, 21 }, { 21, 20 }, { 21, 21 }, { 21, 21 }, { 21, 22 }, { 22, 21 },
+   { 22, 22 }, { 22, 22 }, { 22, 23 }, { 23, 22 }, { 23, 23 }, { 23, 23 }, { 23, 24 }, { 24, 23 },
+   { 24, 24 }, { 24, 24 }, { 24, 25 }, { 25, 24 }, { 25, 25 }, { 25, 25 }, { 25, 26 }, { 26, 25 },
+   { 26, 26 }, { 26, 26 }, { 26, 27 }, { 27, 26 }, { 24, 32 }, { 27, 27 }, { 27, 28 }, { 28, 27 },
+   { 25, 33 }, { 28, 28 }, { 28, 29 }, { 29, 28 }, { 27, 32 }, { 29, 29 }, { 29, 30 }, { 30, 29 },
+   { 28, 33 }, { 30, 30 }, { 30, 31 }, { 31, 30 }, { 30, 32 }, { 31, 31 }, { 31, 32 }, { 32, 30 },
+   { 32, 31 }, { 33, 30 }, { 32, 32 }, { 32, 33 }, { 33, 32 }, { 34, 31 }, { 33, 33 }, { 33, 34 },
+   { 34, 33 }, { 36, 30 }, { 34, 34 }, { 34, 35 }, { 35, 34 }, { 37, 31 }, { 35, 35 }, { 35, 36 },
+   { 36, 35 }, { 36, 36 }, { 36, 36 }, { 36, 37 }, { 37, 36 }, { 37, 37 }, { 37, 37 }, { 37, 38 },
+   { 38, 37 }, { 38, 38 }, { 38, 38 }, { 38, 39 }, { 39, 38 }, { 39, 39 }, { 39, 39 }, { 39, 40 },
+   { 40, 39 }, { 40, 40 }, { 40, 40 }, { 40, 41 }, { 41, 40 }, { 41, 41 }, { 41, 41 }, { 41, 42 },
+   { 42, 41 }, { 42, 42 }, { 42, 42 }, { 42, 43 }, { 43, 42 }, { 40, 48 }, { 43, 43 }, { 43, 44 },
+   { 44, 43 }, { 41, 49 }, { 44, 44 }, { 44, 45 }, { 45, 44 }, { 43, 48 }, { 45, 45 }, { 45, 46 },
+   { 46, 45 }, { 44, 49 }, { 46, 46 }, { 46, 47 }, { 47, 46 }, { 46, 48 }, { 47, 47 }, { 47, 48 },
+   { 48, 46 }, { 48, 47 }, { 49, 46 }, { 48, 48 }, { 48, 49 }, { 49, 48 }, { 50, 47 }, { 49, 49 },
+   { 49, 50 }, { 50, 49 }, { 52, 46 }, { 50, 50 }, { 50, 51 }, { 51, 50 }, { 53, 47 }, { 51, 51 },
+   { 51, 52 }, { 52, 51 }, { 52, 52 }, { 52, 52 }, { 52, 53 }, { 53, 52 }, { 53, 53 }, { 53, 53 },
+   { 53, 54 }, { 54, 53 }, { 54, 54 }, { 54, 54 }, { 54, 55 }, { 55, 54 }, { 55, 55 }, { 55, 55 },
+   { 55, 56 }, { 56, 55 }, { 56, 56 }, { 56, 56 }, { 56, 57 }, { 57, 56 }, { 57, 57 }, { 57, 57 },
+   { 57, 58 }, { 58, 57 }, { 58, 58 }, { 58, 58 }, { 58, 59 }, { 59, 58 }, { 59, 59 }, { 59, 59 },
+   { 59, 60 }, { 60, 59 }, { 60, 60 }, { 60, 60 }, { 60, 61 }, { 61, 60 }, { 61, 61 }, { 61, 61 },
+   { 61, 62 }, { 62, 61 }, { 62, 62 }, { 62, 62 }, { 62, 63 }, { 63, 62 }, { 63, 63 }, { 63, 63 },
+};
+
+static int stb__Mul8Bit(int a, int b)
+{
+  int t = a*b + 128;
+  return (t + (t >> 8)) >> 8;
+}
+
+static void stb__From16Bit(unsigned char *out, unsigned short v)
+{
+   int rv = (v & 0xf800) >> 11;
+   int gv = (v & 0x07e0) >>  5;
+   int bv = (v & 0x001f) >>  0;
+
+   // expand to 8 bits via bit replication
+   out[0] = (rv * 33) >> 2;
+   out[1] = (gv * 65) >> 4;
+   out[2] = (bv * 33) >> 2;
+   out[3] = 0;
+}
+
+static unsigned short stb__As16Bit(int r, int g, int b)
+{
+   return (unsigned short)((stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31));
+}
+
+// linear interpolation at 1/3 point between a and b, using desired rounding type
+static int stb__Lerp13(int a, int b)
+{
+#ifdef STB_DXT_USE_ROUNDING_BIAS
+   // with rounding bias
+   return a + stb__Mul8Bit(b-a, 0x55);
+#else
+   // without rounding bias
+   // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed.
+   return (2*a + b) / 3;
+#endif
+}
+
+// lerp RGB color
+static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2)
+{
+   out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
+   out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
+   out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
+}
+
+/****************************************************************************/
+
+static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1)
+{
+   stb__From16Bit(color+ 0, c0);
+   stb__From16Bit(color+ 4, c1);
+   stb__Lerp13RGB(color+ 8, color+0, color+4);
+   stb__Lerp13RGB(color+12, color+4, color+0);
+}
+
+// The color matching function
+static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color)
+{
+   unsigned int mask = 0;
+   int dirr = color[0*4+0] - color[1*4+0];
+   int dirg = color[0*4+1] - color[1*4+1];
+   int dirb = color[0*4+2] - color[1*4+2];
+   int dots[16];
+   int stops[4];
+   int i;
+   int c0Point, halfPoint, c3Point;
+
+   for(i=0;i<16;i++)
+      dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb;
+
+   for(i=0;i<4;i++)
+      stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb;
+
+   // think of the colors as arranged on a line; project point onto that line, then choose
+   // next color out of available ones. we compute the crossover points for "best color in top
+   // half"/"best in bottom half" and then the same inside that subinterval.
+   //
+   // relying on this 1d approximation isn't always optimal in terms of euclidean distance,
+   // but it's very close and a lot faster.
+   // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
+
+   c0Point   = (stops[1] + stops[3]);
+   halfPoint = (stops[3] + stops[2]);
+   c3Point   = (stops[2] + stops[0]);
+
+   for (i=15;i>=0;i--) {
+      int dot = dots[i]*2;
+      mask <<= 2;
+
+      if(dot < halfPoint)
+         mask |= (dot < c0Point) ? 1 : 3;
+      else
+         mask |= (dot < c3Point) ? 2 : 0;
+   }
+
+   return mask;
+}
+
+// The color optimization function. (Clever code, part 1)
+static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16)
+{
+  int mind,maxd;
+  unsigned char *minp, *maxp;
+  double magn;
+  int v_r,v_g,v_b;
+  static const int nIterPower = 4;
+  float covf[6],vfr,vfg,vfb;
+
+  // determine color distribution
+  int cov[6];
+  int mu[3],min[3],max[3];
+  int ch,i,iter;
+
+  for(ch=0;ch<3;ch++)
+  {
+    const unsigned char *bp = ((const unsigned char *) block) + ch;
+    int muv,minv,maxv;
+
+    muv = minv = maxv = bp[0];
+    for(i=4;i<64;i+=4)
+    {
+      muv += bp[i];
+      if (bp[i] < minv) minv = bp[i];
+      else if (bp[i] > maxv) maxv = bp[i];
+    }
+
+    mu[ch] = (muv + 8) >> 4;
+    min[ch] = minv;
+    max[ch] = maxv;
+  }
+
+  // determine covariance matrix
+  for (i=0;i<6;i++)
+     cov[i] = 0;
+
+  for (i=0;i<16;i++)
+  {
+    int r = block[i*4+0] - mu[0];
+    int g = block[i*4+1] - mu[1];
+    int b = block[i*4+2] - mu[2];
+
+    cov[0] += r*r;
+    cov[1] += r*g;
+    cov[2] += r*b;
+    cov[3] += g*g;
+    cov[4] += g*b;
+    cov[5] += b*b;
+  }
+
+  // convert covariance matrix to float, find principal axis via power iter
+  for(i=0;i<6;i++)
+    covf[i] = cov[i] / 255.0f;
+
+  vfr = (float) (max[0] - min[0]);
+  vfg = (float) (max[1] - min[1]);
+  vfb = (float) (max[2] - min[2]);
+
+  for(iter=0;iter<nIterPower;iter++)
+  {
+    float r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
+    float g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
+    float b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
+
+    vfr = r;
+    vfg = g;
+    vfb = b;
+  }
+
+  magn = STBD_FABS(vfr);
+  if (STBD_FABS(vfg) > magn) magn = STBD_FABS(vfg);
+  if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb);
+
+   if(magn < 4.0f) { // too small, default to luminance
+      v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
+      v_g = 587;
+      v_b = 114;
+   } else {
+      magn = 512.0 / magn;
+      v_r = (int) (vfr * magn);
+      v_g = (int) (vfg * magn);
+      v_b = (int) (vfb * magn);
+   }
+
+   minp = maxp = block;
+   mind = maxd = block[0]*v_r + block[1]*v_g + block[2]*v_b;
+   // Pick colors at extreme points
+   for(i=1;i<16;i++)
+   {
+      int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b;
+
+      if (dot < mind) {
+         mind = dot;
+         minp = block+i*4;
+      }
+
+      if (dot > maxd) {
+         maxd = dot;
+         maxp = block+i*4;
+      }
+   }
+
+   *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]);
+   *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]);
+}
+
+static const float stb__midpoints5[32] = {
+   0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
+   0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f
+};
+
+static const float stb__midpoints6[64] = {
+   0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
+   0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
+   0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
+   0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f
+};
+
+static unsigned short stb__Quantize5(float x)
+{
+   unsigned short q;
+   x = x < 0 ? 0 : x > 1 ? 1 : x;  // saturate
+   q = (unsigned short)(x * 31);
+   q += (x > stb__midpoints5[q]);
+   return q;
+}
+
+static unsigned short stb__Quantize6(float x)
+{
+   unsigned short q;
+   x = x < 0 ? 0 : x > 1 ? 1 : x;  // saturate
+   q = (unsigned short)(x * 63);
+   q += (x > stb__midpoints6[q]);
+   return q;
+}
+
+// The refinement function. (Clever code, part 2)
+// Tries to optimize colors to suit block contents better.
+// (By solving a least squares system via normal equations+Cramer's rule)
+static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask)
+{
+   static const int w1Tab[4] = { 3,0,2,1 };
+   static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
+   // ^some magic to save a lot of multiplies in the accumulating loop...
+   // (precomputed products of weights for least squares system, accumulated inside one 32-bit register)
+
+   float f;
+   unsigned short oldMin, oldMax, min16, max16;
+   int i, akku = 0, xx,xy,yy;
+   int At1_r,At1_g,At1_b;
+   int At2_r,At2_g,At2_b;
+   unsigned int cm = mask;
+
+   oldMin = *pmin16;
+   oldMax = *pmax16;
+
+   if((mask ^ (mask<<2)) < 4) // all pixels have the same index?
+   {
+      // yes, linear system would be singular; solve using optimal
+      // single-color match on average color
+      int r = 8, g = 8, b = 8;
+      for (i=0;i<16;++i) {
+         r += block[i*4+0];
+         g += block[i*4+1];
+         b += block[i*4+2];
+      }
+
+      r >>= 4; g >>= 4; b >>= 4;
+
+      max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
+      min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
+   } else {
+      At1_r = At1_g = At1_b = 0;
+      At2_r = At2_g = At2_b = 0;
+      for (i=0;i<16;++i,cm>>=2) {
+         int step = cm&3;
+         int w1 = w1Tab[step];
+         int r = block[i*4+0];
+         int g = block[i*4+1];
+         int b = block[i*4+2];
+
+         akku    += prods[step];
+         At1_r   += w1*r;
+         At1_g   += w1*g;
+         At1_b   += w1*b;
+         At2_r   += r;
+         At2_g   += g;
+         At2_b   += b;
+      }
+
+      At2_r = 3*At2_r - At1_r;
+      At2_g = 3*At2_g - At1_g;
+      At2_b = 3*At2_b - At1_b;
+
+      // extract solutions and decide solvability
+      xx = akku >> 16;
+      yy = (akku >> 8) & 0xff;
+      xy = (akku >> 0) & 0xff;
+
+      f = 3.0f / 255.0f / (xx*yy - xy*xy);
+
+      max16 =  stb__Quantize5((At1_r*yy - At2_r * xy) * f) << 11;
+      max16 |= stb__Quantize6((At1_g*yy - At2_g * xy) * f) << 5;
+      max16 |= stb__Quantize5((At1_b*yy - At2_b * xy) * f) << 0;
+
+      min16 =  stb__Quantize5((At2_r*xx - At1_r * xy) * f) << 11;
+      min16 |= stb__Quantize6((At2_g*xx - At1_g * xy) * f) << 5;
+      min16 |= stb__Quantize5((At2_b*xx - At1_b * xy) * f) << 0;
+   }
+
+   *pmin16 = min16;
+   *pmax16 = max16;
+   return oldMin != min16 || oldMax != max16;
+}
+
+// Color block compression
+static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode)
+{
+   unsigned int mask;
+   int i;
+   int refinecount;
+   unsigned short max16, min16;
+   unsigned char color[4*4];
+
+   refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
+
+   // check if block is constant
+   for (i=1;i<16;i++)
+      if (((unsigned int *) block)[i] != ((unsigned int *) block)[0])
+         break;
+
+   if(i == 16) { // constant color
+      int r = block[0], g = block[1], b = block[2];
+      mask  = 0xaaaaaaaa;
+      max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0];
+      min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1];
+   } else {
+      // first step: PCA+map along principal axis
+      stb__OptimizeColorsBlock(block,&max16,&min16);
+      if (max16 != min16) {
+         stb__EvalColors(color,max16,min16);
+         mask = stb__MatchColorsBlock(block,color);
+      } else
+         mask = 0;
+
+      // third step: refine (multiple times if requested)
+      for (i=0;i<refinecount;i++) {
+         unsigned int lastmask = mask;
+
+         if (stb__RefineBlock(block,&max16,&min16,mask)) {
+            if (max16 != min16) {
+               stb__EvalColors(color,max16,min16);
+               mask = stb__MatchColorsBlock(block,color);
+            } else {
+               mask = 0;
+               break;
+            }
+         }
+
+         if(mask == lastmask)
+            break;
+      }
+  }
+
+  // write the color block
+  if(max16 < min16)
+  {
+     unsigned short t = min16;
+     min16 = max16;
+     max16 = t;
+     mask ^= 0x55555555;
+  }
+
+  dest[0] = (unsigned char) (max16);
+  dest[1] = (unsigned char) (max16 >> 8);
+  dest[2] = (unsigned char) (min16);
+  dest[3] = (unsigned char) (min16 >> 8);
+  dest[4] = (unsigned char) (mask);
+  dest[5] = (unsigned char) (mask >> 8);
+  dest[6] = (unsigned char) (mask >> 16);
+  dest[7] = (unsigned char) (mask >> 24);
+}
+
+// Alpha block compression (this is easy for a change)
+static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride)
+{
+   int i,dist,bias,dist4,dist2,bits,mask;
+
+   // find min/max color
+   int mn,mx;
+   mn = mx = src[0];
+
+   for (i=1;i<16;i++)
+   {
+      if (src[i*stride] < mn) mn = src[i*stride];
+      else if (src[i*stride] > mx) mx = src[i*stride];
+   }
+
+   // encode them
+   dest[0] = (unsigned char)mx;
+   dest[1] = (unsigned char)mn;
+   dest += 2;
+
+   // determine bias and emit color indices
+   // given the choice of mx/mn, these indices are optimal:
+   // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
+   dist = mx-mn;
+   dist4 = dist*4;
+   dist2 = dist*2;
+   bias = (dist < 8) ? (dist - 1) : (dist/2 + 2);
+   bias -= mn * 7;
+   bits = 0,mask=0;
+
+   for (i=0;i<16;i++) {
+      int a = src[i*stride]*7 + bias;
+      int ind,t;
+
+      // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
+      t = (a >= dist4) ? -1 : 0; ind =  t & 4; a -= dist4 & t;
+      t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t;
+      ind += (a >= dist);
+
+      // turn linear scale into DXT index (0/1 are extremal pts)
+      ind = -ind & 7;
+      ind ^= (2 > ind);
+
+      // write index
+      mask |= ind << bits;
+      if((bits += 3) >= 8) {
+         *dest++ = (unsigned char)mask;
+         mask >>= 8;
+         bits -= 8;
+      }
+   }
+}
+
+void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode)
+{
+   unsigned char data[16][4];
+   if (alpha) {
+      int i;
+      stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4);
+      dest += 8;
+      // make a new copy of the data in which alpha is opaque,
+      // because code uses a fast test for color constancy
+      memcpy(data, src, 4*16);
+      for (i=0; i < 16; ++i)
+         data[i][3] = 255;
+      src = &data[0][0];
+   }
+
+   stb__CompressColorBlock(dest,(unsigned char*) src,mode);
+}
+
+void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src)
+{
+   stb__CompressAlphaBlock(dest,(unsigned char*) src, 1);
+}
+
+void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src)
+{
+   stb__CompressAlphaBlock(dest,(unsigned char*) src,2);
+   stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2);
+}
+#endif // STB_DXT_IMPLEMENTATION
+
+// Compile with STB_DXT_IMPLEMENTATION and STB_DXT_GENERATE_TABLES
+// defined to generate the tables above.
+#ifdef STB_DXT_GENERATE_TABLES
+#include <stdio.h>
+
+int main()
+{
+   int i, j;
+   const char *omatch_names[] = { "stb__OMatch5", "stb__OMatch6" };
+   int dequant_mults[2] = { 33*4, 65 }; // .4 fixed-point dequant multipliers
+
+   // optimal endpoint tables
+   for (i = 0; i < 2; ++i) {
+      int dequant = dequant_mults[i];
+      int size = i ? 64 : 32;
+      printf("static const unsigned char %s[256][2] = {\n", omatch_names[i]);
+      for (int j = 0; j < 256; ++j) {
+         int mn, mx;
+         int best_mn = 0, best_mx = 0;
+         int best_err = 256 * 100;
+         for (mn=0;mn<size;mn++) {
+            for (mx=0;mx<size;mx++) {
+               int mine = (mn * dequant) >> 4;
+               int maxe = (mx * dequant) >> 4;
+               int err = abs(stb__Lerp13(maxe, mine) - j) * 100;
+
+               // DX10 spec says that interpolation must be within 3% of "correct" result,
+               // add this as error term. Normally we'd expect a random distribution of
+               // +-1.5% error, but nowhere in the spec does it say that the error has to be
+               // unbiased - better safe than sorry.
+               err += abs(maxe - mine) * 3;
+
+               if(err < best_err) {
+                  best_mn = mn;
+                  best_mx = mx;
+                  best_err = err;
+               }
+            }
+         }
+         if ((j % 8) == 0) printf("  "); // 2 spaces, third is done below
+         printf(" { %2d, %2d },", best_mx, best_mn);
+         if ((j % 8) == 7) printf("\n");
+      }
+      printf("};\n");
+   }
+
+   return 0;
+}
+#endif
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_easy_font.h
@@ -1,0 +1,305 @@
+// stb_easy_font.h - v1.1 - bitmap font for 3D rendering - public domain
+// Sean Barrett, Feb 2015
+//
+//    Easy-to-deploy,
+//    reasonably compact,
+//    extremely inefficient performance-wise,
+//    crappy-looking,
+//    ASCII-only,
+//    bitmap font for use in 3D APIs.
+//
+// Intended for when you just want to get some text displaying
+// in a 3D app as quickly as possible.
+//
+// Doesn't use any textures, instead builds characters out of quads.
+//
+// DOCUMENTATION:
+//
+//   int stb_easy_font_width(char *text)
+//   int stb_easy_font_height(char *text)
+//
+//      Takes a string and returns the horizontal size and the
+//      vertical size (which can vary if 'text' has newlines).
+//
+//   int stb_easy_font_print(float x, float y,
+//                           char *text, unsigned char color[4],
+//                           void *vertex_buffer, int vbuf_size)
+//
+//      Takes a string (which can contain '\n') and fills out a
+//      vertex buffer with renderable data to draw the string.
+//      Output data assumes increasing x is rightwards, increasing y
+//      is downwards.
+//
+//      The vertex data is divided into quads, i.e. there are four
+//      vertices in the vertex buffer for each quad.
+//
+//      The vertices are stored in an interleaved format:
+//
+//         x:float
+//         y:float
+//         z:float
+//         color:uint8[4]
+//
+//      You can ignore z and color if you get them from elsewhere
+//      This format was chosen in the hopes it would make it
+//      easier for you to reuse existing vertex-buffer-drawing code.
+//
+//      If you pass in NULL for color, it becomes 255,255,255,255.
+//
+//      Returns the number of quads.
+//
+//      If the buffer isn't large enough, it will truncate.
+//      Expect it to use an average of ~270 bytes per character.
+//
+//      If your API doesn't draw quads, build a reusable index
+//      list that allows you to render quads as indexed triangles.
+//
+//   void stb_easy_font_spacing(float spacing)
+//
+//      Use positive values to expand the space between characters,
+//      and small negative values (no smaller than -1.5) to contract
+//      the space between characters.
+//
+//      E.g. spacing = 1 adds one "pixel" of spacing between the
+//      characters. spacing = -1 is reasonable but feels a bit too
+//      compact to me; -0.5 is a reasonable compromise as long as
+//      you're scaling the font up.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// VERSION HISTORY
+//
+//   (2020-02-02)  1.1   make everything static so can compile it in more than one src file
+//   (2017-01-15)  1.0   space character takes same space as numbers; fix bad spacing of 'f'
+//   (2016-01-22)  0.7   width() supports multiline text; add height()
+//   (2015-09-13)  0.6   #include <math.h>; updated license
+//   (2015-02-01)  0.5   First release
+//
+// CONTRIBUTORS
+//
+//   github:vassvik    --  bug report
+//   github:podsvirov  --  fix multiple definition errors
+
+#if 0
+// SAMPLE CODE:
+//
+//    Here's sample code for old OpenGL; it's a lot more complicated
+//    to make work on modern APIs, and that's your problem.
+//
+void print_string(float x, float y, char *text, float r, float g, float b)
+{
+  static char buffer[99999]; // ~500 chars
+  int num_quads;
+
+  num_quads = stb_easy_font_print(x, y, text, NULL, buffer, sizeof(buffer));
+
+  glColor3f(r,g,b);
+  glEnableClientState(GL_VERTEX_ARRAY);
+  glVertexPointer(2, GL_FLOAT, 16, buffer);
+  glDrawArrays(GL_QUADS, 0, num_quads*4);
+  glDisableClientState(GL_VERTEX_ARRAY);
+}
+#endif
+
+#ifndef INCLUDE_STB_EASY_FONT_H
+#define INCLUDE_STB_EASY_FONT_H
+
+#include <stdlib.h>
+#include <math.h>
+
+static struct stb_easy_font_info_struct {
+    unsigned char advance;
+    unsigned char h_seg;
+    unsigned char v_seg;
+} stb_easy_font_charinfo[96] = {
+    {  6,  0,  0 },  {  3,  0,  0 },  {  5,  1,  1 },  {  7,  1,  4 },
+    {  7,  3,  7 },  {  7,  6, 12 },  {  7,  8, 19 },  {  4, 16, 21 },
+    {  4, 17, 22 },  {  4, 19, 23 },  { 23, 21, 24 },  { 23, 22, 31 },
+    { 20, 23, 34 },  { 22, 23, 36 },  { 19, 24, 36 },  { 21, 25, 36 },
+    {  6, 25, 39 },  {  6, 27, 43 },  {  6, 28, 45 },  {  6, 30, 49 },
+    {  6, 33, 53 },  {  6, 34, 57 },  {  6, 40, 58 },  {  6, 46, 59 },
+    {  6, 47, 62 },  {  6, 55, 64 },  { 19, 57, 68 },  { 20, 59, 68 },
+    { 21, 61, 69 },  { 22, 66, 69 },  { 21, 68, 69 },  {  7, 73, 69 },
+    {  9, 75, 74 },  {  6, 78, 81 },  {  6, 80, 85 },  {  6, 83, 90 },
+    {  6, 85, 91 },  {  6, 87, 95 },  {  6, 90, 96 },  {  7, 92, 97 },
+    {  6, 96,102 },  {  5, 97,106 },  {  6, 99,107 },  {  6,100,110 },
+    {  6,100,115 },  {  7,101,116 },  {  6,101,121 },  {  6,101,125 },
+    {  6,102,129 },  {  7,103,133 },  {  6,104,140 },  {  6,105,145 },
+    {  7,107,149 },  {  6,108,151 },  {  7,109,155 },  {  7,109,160 },
+    {  7,109,165 },  {  7,118,167 },  {  6,118,172 },  {  4,120,176 },
+    {  6,122,177 },  {  4,122,181 },  { 23,124,182 },  { 22,129,182 },
+    {  4,130,182 },  { 22,131,183 },  {  6,133,187 },  { 22,135,191 },
+    {  6,137,192 },  { 22,139,196 },  {  6,144,197 },  { 22,147,198 },
+    {  6,150,202 },  { 19,151,206 },  { 21,152,207 },  {  6,155,209 },
+    {  3,160,210 },  { 23,160,211 },  { 22,164,216 },  { 22,165,220 },
+    { 22,167,224 },  { 22,169,228 },  { 21,171,232 },  { 21,173,233 },
+    {  5,178,233 },  { 22,179,234 },  { 23,180,238 },  { 23,180,243 },
+    { 23,180,248 },  { 22,189,248 },  { 22,191,252 },  {  5,196,252 },
+    {  3,203,252 },  {  5,203,253 },  { 22,210,253 },  {  0,214,253 },
+};
+
+static unsigned char stb_easy_font_hseg[214] = {
+   97,37,69,84,28,51,2,18,10,49,98,41,65,25,81,105,33,9,97,1,97,37,37,36,
+    81,10,98,107,3,100,3,99,58,51,4,99,58,8,73,81,10,50,98,8,73,81,4,10,50,
+    98,8,25,33,65,81,10,50,17,65,97,25,33,25,49,9,65,20,68,1,65,25,49,41,
+    11,105,13,101,76,10,50,10,50,98,11,99,10,98,11,50,99,11,50,11,99,8,57,
+    58,3,99,99,107,10,10,11,10,99,11,5,100,41,65,57,41,65,9,17,81,97,3,107,
+    9,97,1,97,33,25,9,25,41,100,41,26,82,42,98,27,83,42,98,26,51,82,8,41,
+    35,8,10,26,82,114,42,1,114,8,9,73,57,81,41,97,18,8,8,25,26,26,82,26,82,
+    26,82,41,25,33,82,26,49,73,35,90,17,81,41,65,57,41,65,25,81,90,114,20,
+    84,73,57,41,49,25,33,65,81,9,97,1,97,25,33,65,81,57,33,25,41,25,
+};
+
+static unsigned char stb_easy_font_vseg[253] = {
+   4,2,8,10,15,8,15,33,8,15,8,73,82,73,57,41,82,10,82,18,66,10,21,29,1,65,
+    27,8,27,9,65,8,10,50,97,74,66,42,10,21,57,41,29,25,14,81,73,57,26,8,8,
+    26,66,3,8,8,15,19,21,90,58,26,18,66,18,105,89,28,74,17,8,73,57,26,21,
+    8,42,41,42,8,28,22,8,8,30,7,8,8,26,66,21,7,8,8,29,7,7,21,8,8,8,59,7,8,
+    8,15,29,8,8,14,7,57,43,10,82,7,7,25,42,25,15,7,25,41,15,21,105,105,29,
+    7,57,57,26,21,105,73,97,89,28,97,7,57,58,26,82,18,57,57,74,8,30,6,8,8,
+    14,3,58,90,58,11,7,74,43,74,15,2,82,2,42,75,42,10,67,57,41,10,7,2,42,
+    74,106,15,2,35,8,8,29,7,8,8,59,35,51,8,8,15,35,30,35,8,8,30,7,8,8,60,
+    36,8,45,7,7,36,8,43,8,44,21,8,8,44,35,8,8,43,23,8,8,43,35,8,8,31,21,15,
+    20,8,8,28,18,58,89,58,26,21,89,73,89,29,20,8,8,30,7,
+};
+
+typedef struct
+{
+   unsigned char c[4];
+} stb_easy_font_color;
+
+static int stb_easy_font_draw_segs(float x, float y, unsigned char *segs, int num_segs, int vertical, stb_easy_font_color c, char *vbuf, int vbuf_size, int offset)
+{
+    int i,j;
+    for (i=0; i < num_segs; ++i) {
+        int len = segs[i] & 7;
+        x += (float) ((segs[i] >> 3) & 1);
+        if (len && offset+64 <= vbuf_size) {
+            float y0 = y + (float) (segs[i]>>4);
+            for (j=0; j < 4; ++j) {
+                * (float *) (vbuf+offset+0) = x  + (j==1 || j==2 ? (vertical ? 1 : len) : 0);
+                * (float *) (vbuf+offset+4) = y0 + (    j >= 2   ? (vertical ? len : 1) : 0);
+                * (float *) (vbuf+offset+8) = 0.f;
+                * (stb_easy_font_color *) (vbuf+offset+12) = c;
+                offset += 16;
+            }
+        }
+    }
+    return offset;
+}
+
+static float stb_easy_font_spacing_val = 0;
+static void stb_easy_font_spacing(float spacing)
+{
+   stb_easy_font_spacing_val = spacing;
+}
+
+static int stb_easy_font_print(float x, float y, char *text, unsigned char color[4], void *vertex_buffer, int vbuf_size)
+{
+    char *vbuf = (char *) vertex_buffer;
+    float start_x = x;
+    int offset = 0;
+
+    stb_easy_font_color c = { 255,255,255,255 }; // use structure copying to avoid needing depending on memcpy()
+    if (color) { c.c[0] = color[0]; c.c[1] = color[1]; c.c[2] = color[2]; c.c[3] = color[3]; }
+
+    while (*text && offset < vbuf_size) {
+        if (*text == '\n') {
+            y += 12;
+            x = start_x;
+        } else {
+            unsigned char advance = stb_easy_font_charinfo[*text-32].advance;
+            float y_ch = advance & 16 ? y+1 : y;
+            int h_seg, v_seg, num_h, num_v;
+            h_seg = stb_easy_font_charinfo[*text-32  ].h_seg;
+            v_seg = stb_easy_font_charinfo[*text-32  ].v_seg;
+            num_h = stb_easy_font_charinfo[*text-32+1].h_seg - h_seg;
+            num_v = stb_easy_font_charinfo[*text-32+1].v_seg - v_seg;
+            offset = stb_easy_font_draw_segs(x, y_ch, &stb_easy_font_hseg[h_seg], num_h, 0, c, vbuf, vbuf_size, offset);
+            offset = stb_easy_font_draw_segs(x, y_ch, &stb_easy_font_vseg[v_seg], num_v, 1, c, vbuf, vbuf_size, offset);
+            x += advance & 15;
+            x += stb_easy_font_spacing_val;
+        }
+        ++text;
+    }
+    return (unsigned) offset/64;
+}
+
+static int stb_easy_font_width(char *text)
+{
+    float len = 0;
+    float max_len = 0;
+    while (*text) {
+        if (*text == '\n') {
+            if (len > max_len) max_len = len;
+            len = 0;
+        } else {
+            len += stb_easy_font_charinfo[*text-32].advance & 15;
+            len += stb_easy_font_spacing_val;
+        }
+        ++text;
+    }
+    if (len > max_len) max_len = len;
+    return (int) ceil(max_len);
+}
+
+static int stb_easy_font_height(char *text)
+{
+    float y = 0;
+    int nonempty_line=0;
+    while (*text) {
+        if (*text == '\n') {
+            y += 12;
+            nonempty_line = 0;
+        } else {
+            nonempty_line = 1;
+        }
+        ++text;
+    }
+    return (int) ceil(y + (nonempty_line ? 12 : 0));
+}
+#endif
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_herringbone_wang_tile.h
@@ -1,0 +1,1221 @@
+/* stbhw - v0.7 -  http://nothings.org/gamedev/herringbone
+   Herringbone Wang Tile Generator - Sean Barrett 2014 - public domain
+
+== LICENSE ==============================
+
+This software is dual-licensed to the public domain and under the following
+license: you are granted a perpetual, irrevocable license to copy, modify,
+publish, and distribute this file as you see fit.
+
+== WHAT IT IS ===========================
+
+ This library is an SDK for Herringbone Wang Tile generation:
+
+      http://nothings.org/gamedev/herringbone
+
+ The core design is that you use this library offline to generate a
+ "template" of the tiles you'll create. You then edit those tiles, then
+ load the created tile image file back into this library and use it at
+ runtime to generate "maps".
+
+ You cannot load arbitrary tile image files with this library; it is
+ only designed to load image files made from the template it created.
+ It stores a binary description of the tile sizes & constraints in a
+ few pixels, and uses those to recover the rules, rather than trying
+ to parse the tiles themselves.
+
+ You *can* use this library to generate from arbitrary tile sets, but
+ only by loading the tile set and specifying the constraints explicitly
+ yourself.
+
+== COMPILING ============================
+
+ 1. #define STB_HERRINGBONE_WANG_TILE_IMPLEMENTATION before including this
+    header file in *one* source file to create the implementation
+    in that source file.
+
+ 2. optionally #define STB_HBWANG_RAND() to be a random number
+    generator. if you don't define it, it will use rand(),
+    and you need to seed srand() yourself.
+
+ 3. optionally #define STB_HBWANG_ASSERT(x), otherwise
+    it will use assert()
+
+ 4. optionally #define STB_HBWANG_STATIC to force all symbols to be
+    static instead of public, so they are only accesible
+    in the source file that creates the implementation
+
+ 5. optionally #define STB_HBWANG_NO_REPITITION_REDUCTION to disable
+    the code that tries to reduce having the same tile appear
+    adjacent to itself in wang-corner-tile mode (e.g. imagine
+    if you were doing something where 90% of things should be
+    the same grass tile, you need to disable this system)
+
+ 6. optionally define STB_HBWANG_MAX_X and STB_HBWANG_MAX_Y
+    to be the max dimensions of the generated map in multiples
+    of the wang tile's short side's length (e.g. if you
+    have 20x10 wang tiles, so short_side_len=10, and you
+    have MAX_X is 17, then the largest map you can generate
+    is 170 pixels wide). The defaults are 100x100. This
+    is used to define static arrays which affect memory
+    usage.
+
+== USING ================================
+
+  To use the map generator, you need a tileset. You can download
+  some sample tilesets from http://nothings.org/gamedev/herringbone
+
+  Then see the "sample application" below.
+
+  You can also use this file to generate templates for
+  tilesets which you then hand-edit to create the data.
+
+
+== MEMORY MANAGEMENT ====================
+
+  The tileset loader allocates memory with malloc(). The map
+  generator does no memory allocation, so e.g. you can load
+  tilesets at startup and never free them and never do any
+  further allocation.
+
+
+== SAMPLE APPLICATION ===================
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"        // http://nothings.org/stb_image.c
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"  // http://nothings.org/stb/stb_image_write.h
+
+#define STB_HBWANG_IMPLEMENTATION
+#include "stb_hbwang.h"
+
+int main(int argc, char **argv)
+{
+   unsigned char *data;
+   int xs,ys, w,h;
+   stbhw_tileset ts;
+
+   if (argc != 4) {
+      fprintf(stderr, "Usage: mapgen {tile-file} {xsize} {ysize}\n"
+                      "generates file named 'test_map.png'\n");
+      exit(1);
+   }
+   data = stbi_load(argv[1], &w, &h, NULL, 3);
+   xs = atoi(argv[2]);
+   ys = atoi(argv[3]);
+   if (data == NULL) {
+      fprintf(stderr, "Error opening or parsing '%s' as an image file\n", argv[1]);
+      exit(1);
+   }
+   if (xs < 1 || xs > 1000) {
+      fprintf(stderr, "xsize invalid or out of range\n");
+      exit(1);
+   }
+   if (ys < 1 || ys > 1000) {
+      fprintf(stderr, "ysize invalid or out of range\n");
+      exit(1);
+   }
+
+   stbhw_build_tileset_from_image(&ts, data, w*3, w, h);
+   free(data);
+
+   // allocate a buffer to create the final image to
+   data = malloc(3 * xs * ys);
+
+   srand(time(NULL));
+   stbhw_generate_image(&ts, NULL, data, xs*3, xs, ys);
+
+   stbi_write_png("test_map.png", xs, ys, 3, data, xs*3);
+
+   stbhw_free_tileset(&ts);
+   free(data);
+
+   return 0;
+}
+
+== VERSION HISTORY ===================
+
+   0.7   2019-03-04   - fix warnings
+	0.6   2014-08-17   - fix broken map-maker
+	0.5   2014-07-07   - initial release
+
+*/
+
+//////////////////////////////////////////////////////////////////////////////
+//                                                                          //
+//                         HEADER FILE SECTION                              //
+//                                                                          //
+
+#ifndef INCLUDE_STB_HWANG_H
+#define INCLUDE_STB_HWANG_H
+
+#ifdef STB_HBWANG_STATIC
+#define STBHW_EXTERN static
+#else
+#ifdef __cplusplus
+#define STBHW_EXTERN extern "C"
+#else
+#define STBHW_EXTERN extern
+#endif
+#endif
+
+typedef struct stbhw_tileset stbhw_tileset;
+
+// returns description of last error produced by any function (not thread-safe)
+STBHW_EXTERN const char *stbhw_get_last_error(void);
+
+// build a tileset from an image that conforms to a template created by this
+// library. (you allocate storage for stbhw_tileset and function fills it out;
+// memory for individual tiles are malloc()ed).
+// returns non-zero on success, 0 on error
+STBHW_EXTERN int stbhw_build_tileset_from_image(stbhw_tileset *ts,
+                     unsigned char *pixels, int stride_in_bytes, int w, int h);
+
+// free a tileset built by stbhw_build_tileset_from_image
+STBHW_EXTERN void stbhw_free_tileset(stbhw_tileset *ts);
+
+// generate a map that is w * h pixels (3-bytes each)
+// returns non-zero on success, 0 on error
+// not thread-safe (uses a global data structure to avoid memory management)
+// weighting should be NULL, as non-NULL weighting is currently untested
+STBHW_EXTERN int stbhw_generate_image(stbhw_tileset *ts, int **weighting,
+                     unsigned char *pixels, int stride_in_bytes, int w, int h);
+
+//////////////////////////////////////
+//
+// TILESET DATA STRUCTURE
+//
+// if you use the image-to-tileset system from this file, you
+// don't need to worry about these data structures. but if you
+// want to build/load a tileset yourself, you'll need to fill
+// these out.
+
+typedef struct
+{
+   // the edge or vertex constraints, according to diagram below
+   signed char a,b,c,d,e,f;
+
+   // The herringbone wang tile data; it is a bitmap which is either
+   // w=2*short_sidelen,h=short_sidelen, or w=short_sidelen,h=2*short_sidelen.
+   // it is always RGB, stored row-major, with no padding between rows.
+   // (allocate stbhw_tile structure to be large enough for the pixel data)
+   unsigned char pixels[1];
+} stbhw_tile;
+
+struct stbhw_tileset
+{
+   int is_corner;
+   int num_color[6];  // number of colors for each of 6 edge types or 4 corner types
+   int short_side_len;
+   stbhw_tile **h_tiles;
+   stbhw_tile **v_tiles;
+   int num_h_tiles, max_h_tiles;
+   int num_v_tiles, max_v_tiles;
+};
+
+///////////////  TEMPLATE GENERATOR  //////////////////////////
+
+// when requesting a template, you fill out this data
+typedef struct
+{
+   int is_corner;      // using corner colors or edge colors?
+   int short_side_len; // rectangles is 2n x n, n = short_side_len
+   int num_color[6];   // see below diagram for meaning of the index to this;
+                       // 6 values if edge (!is_corner), 4 values if is_corner
+                       // legal numbers: 1..8 if edge, 1..4 if is_corner
+   int num_vary_x;     // additional number of variations along x axis in the template
+   int num_vary_y;     // additional number of variations along y axis in the template
+   int corner_type_color_template[4][4];
+      // if corner_type_color_template[s][t] is non-zero, then any
+      // corner of type s generated as color t will get a little
+      // corner sample markup in the template image data
+
+} stbhw_config;
+
+// computes the size needed for the template image
+STBHW_EXTERN void stbhw_get_template_size(stbhw_config *c, int *w, int *h);
+
+// generates a template image, assuming data is 3*w*h bytes long, RGB format
+STBHW_EXTERN int stbhw_make_template(stbhw_config *c, unsigned char *data, int w, int h, int stride_in_bytes);
+
+#endif//INCLUDE_STB_HWANG_H
+
+
+// TILE CONSTRAINT TYPES
+//
+// there are 4 "types" of corners and 6 types of edges.
+// you can configure the tileset to have different numbers
+// of colors for each type of color or edge.
+//
+// corner types:
+//
+//                     0---*---1---*---2---*---3
+//                     |       |               |
+//                     *       *               *
+//                     |       |               |
+//     1---*---2---*---3       0---*---1---*---2
+//     |               |       |
+//     *               *       *
+//     |               |       |
+//     0---*---1---*---2---*---3
+//
+//
+//  edge types:
+//
+//     *---2---*---3---*      *---0---*
+//     |               |      |       |
+//     1               4      5       1
+//     |               |      |       |
+//     *---0---*---2---*      *       *
+//                            |       |
+//                            4       5
+//                            |       |
+//                            *---3---*
+//
+// TILE CONSTRAINTS
+//
+// each corner/edge has a color; this shows the name
+// of the variable containing the color
+//
+// corner constraints:
+//
+//                        a---*---d
+//                        |       |
+//                        *       *
+//                        |       |
+//     a---*---b---*---c  b       e
+//     |               |  |       |
+//     *               *  *       *
+//     |               |  |       |
+//     d---*---e---*---f  c---*---f
+//
+//
+//  edge constraints:
+//
+//     *---a---*---b---*      *---a---*
+//     |               |      |       |
+//     c               d      b       c
+//     |               |      |       |
+//     *---e---*---f---*      *       *
+//                            |       |
+//                            d       e
+//                            |       |
+//                            *---f---*
+//
+
+
+//////////////////////////////////////////////////////////////////////////////
+//                                                                          //
+//                       IMPLEMENTATION SECTION                             //
+//                                                                          //
+
+#ifdef STB_HERRINGBONE_WANG_TILE_IMPLEMENTATION
+
+
+#include <string.h> // memcpy
+#include <stdlib.h> // malloc
+
+#ifndef STB_HBWANG_RAND
+#include <stdlib.h>
+#define STB_HBWANG_RAND()  (rand() >> 4)
+#endif
+
+#ifndef STB_HBWANG_ASSERT
+#include <assert.h>
+#define STB_HBWANG_ASSERT(x)  assert(x)
+#endif
+
+// map size
+#ifndef STB_HBWANG_MAX_X
+#define STB_HBWANG_MAX_X  100
+#endif
+
+#ifndef STB_HBWANG_MAX_Y
+#define STB_HBWANG_MAX_Y  100
+#endif
+
+// global variables for color assignments
+// @MEMORY change these to just store last two/three rows
+//         and keep them on the stack
+static signed char c_color[STB_HBWANG_MAX_Y+6][STB_HBWANG_MAX_X+6];
+static signed char v_color[STB_HBWANG_MAX_Y+6][STB_HBWANG_MAX_X+5];
+static signed char h_color[STB_HBWANG_MAX_Y+5][STB_HBWANG_MAX_X+6];
+
+static const char *stbhw_error;
+STBHW_EXTERN const char *stbhw_get_last_error(void)
+{
+   const char *temp = stbhw_error;
+   stbhw_error = 0;
+   return temp;
+}
+
+
+
+
+/////////////////////////////////////////////////////////////
+//
+//  SHARED TEMPLATE-DESCRIPTION CODE
+//
+//  Used by both template generator and tileset parser; by
+//  using the same code, they are locked in sync and we don't
+//  need to try to do more sophisticated parsing of edge color
+//  markup or something.
+
+typedef void stbhw__process_rect(struct stbhw__process *p, int xpos, int ypos,
+                                 int a, int b, int c, int d, int e, int f);
+
+typedef struct stbhw__process
+{
+   stbhw_tileset *ts;
+   stbhw_config *c;
+   stbhw__process_rect *process_h_rect;
+   stbhw__process_rect *process_v_rect;
+   unsigned char *data;
+   int stride,w,h;
+} stbhw__process;
+
+static void stbhw__process_h_row(stbhw__process *p,
+                           int xpos, int ypos,
+                           int a0, int a1,
+                           int b0, int b1,
+                           int c0, int c1,
+                           int d0, int d1,
+                           int e0, int e1,
+                           int f0, int f1,
+                           int variants)
+{
+   int a,b,c,d,e,f,v;
+
+   for (v=0; v < variants; ++v)
+      for (f=f0; f <= f1; ++f)
+         for (e=e0; e <= e1; ++e)
+            for (d=d0; d <= d1; ++d)
+               for (c=c0; c <= c1; ++c)
+                  for (b=b0; b <= b1; ++b)
+                     for (a=a0; a <= a1; ++a) {
+                        p->process_h_rect(p, xpos, ypos, a,b,c,d,e,f);
+                        xpos += 2*p->c->short_side_len + 3;
+                     }
+}
+
+static void stbhw__process_v_row(stbhw__process *p,
+                           int xpos, int ypos,
+                           int a0, int a1,
+                           int b0, int b1,
+                           int c0, int c1,
+                           int d0, int d1,
+                           int e0, int e1,
+                           int f0, int f1,
+                           int variants)
+{
+   int a,b,c,d,e,f,v;
+
+   for (v=0; v < variants; ++v)
+      for (f=f0; f <= f1; ++f)
+         for (e=e0; e <= e1; ++e)
+            for (d=d0; d <= d1; ++d)
+               for (c=c0; c <= c1; ++c)
+                  for (b=b0; b <= b1; ++b)
+                     for (a=a0; a <= a1; ++a) {
+                        p->process_v_rect(p, xpos, ypos, a,b,c,d,e,f);
+                        xpos += p->c->short_side_len+3;
+                     }
+}
+
+static void stbhw__get_template_info(stbhw_config *c, int *w, int *h, int *h_count, int *v_count)
+{
+   int size_x,size_y;
+   int horz_count,vert_count;
+
+   if (c->is_corner) {
+      int horz_w = c->num_color[1] * c->num_color[2] * c->num_color[3] * c->num_vary_x;
+      int horz_h = c->num_color[0] * c->num_color[1] * c->num_color[2] * c->num_vary_y;
+
+      int vert_w = c->num_color[0] * c->num_color[3] * c->num_color[2] * c->num_vary_y;
+      int vert_h = c->num_color[1] * c->num_color[0] * c->num_color[3] * c->num_vary_x;
+
+      int horz_x = horz_w * (2*c->short_side_len + 3);
+      int horz_y = horz_h * (  c->short_side_len + 3);
+
+      int vert_x = vert_w * (  c->short_side_len + 3);
+      int vert_y = vert_h * (2*c->short_side_len + 3);
+
+      horz_count = horz_w * horz_h;
+      vert_count = vert_w * vert_h;
+
+      size_x = horz_x > vert_x ? horz_x : vert_x;
+      size_y = 2 + horz_y + 2 + vert_y;
+   } else {
+      int horz_w = c->num_color[0] * c->num_color[1] * c->num_color[2] * c->num_vary_x;
+      int horz_h = c->num_color[3] * c->num_color[4] * c->num_color[2] * c->num_vary_y;
+
+      int vert_w = c->num_color[0] * c->num_color[5] * c->num_color[1] * c->num_vary_y;
+      int vert_h = c->num_color[3] * c->num_color[4] * c->num_color[5] * c->num_vary_x;
+
+      int horz_x = horz_w * (2*c->short_side_len + 3);
+      int horz_y = horz_h * (  c->short_side_len + 3);
+
+      int vert_x = vert_w * (  c->short_side_len + 3);
+      int vert_y = vert_h * (2*c->short_side_len + 3);
+
+      horz_count = horz_w * horz_h;
+      vert_count = vert_w * vert_h;
+
+      size_x = horz_x > vert_x ? horz_x : vert_x;
+      size_y = 2 + horz_y + 2 + vert_y;
+   }
+   if (w) *w = size_x;
+   if (h) *h = size_y;
+   if (h_count) *h_count = horz_count;
+   if (v_count) *v_count = vert_count;
+}
+
+STBHW_EXTERN void stbhw_get_template_size(stbhw_config *c, int *w, int *h)
+{
+   stbhw__get_template_info(c, w, h, NULL, NULL);
+}
+
+static int stbhw__process_template(stbhw__process *p)
+{
+   int i,j,k,q, ypos;
+   int size_x, size_y;
+   stbhw_config *c = p->c;
+
+   stbhw__get_template_info(c, &size_x, &size_y, NULL, NULL);
+
+   if (p->w < size_x || p->h < size_y) {
+      stbhw_error = "image too small for configuration";
+      return 0;
+   }
+
+   if (c->is_corner) {
+      ypos = 2;
+      for (k=0; k < c->num_color[2]; ++k) {
+         for (j=0; j < c->num_color[1]; ++j) {
+            for (i=0; i < c->num_color[0]; ++i) {
+               for (q=0; q < c->num_vary_y; ++q) {
+                  stbhw__process_h_row(p, 0,ypos,
+                     0,c->num_color[1]-1, 0,c->num_color[2]-1, 0,c->num_color[3]-1,
+                     i,i, j,j, k,k,
+                     c->num_vary_x);
+                  ypos += c->short_side_len + 3;
+               }
+            }
+         }
+      }
+      ypos += 2;
+      for (k=0; k < c->num_color[3]; ++k) {
+         for (j=0; j < c->num_color[0]; ++j) {
+            for (i=0; i < c->num_color[1]; ++i) {
+               for (q=0; q < c->num_vary_x; ++q) {
+                  stbhw__process_v_row(p, 0,ypos,
+                     0,c->num_color[0]-1, 0,c->num_color[3]-1, 0,c->num_color[2]-1,
+                     i,i, j,j, k,k,
+                     c->num_vary_y);
+                  ypos += (c->short_side_len*2) + 3;
+               }
+            }
+         }
+      }
+      assert(ypos == size_y);
+   } else {
+      ypos = 2;
+      for (k=0; k < c->num_color[3]; ++k) {
+         for (j=0; j < c->num_color[4]; ++j) {
+            for (i=0; i < c->num_color[2]; ++i) {
+               for (q=0; q < c->num_vary_y; ++q) {
+                  stbhw__process_h_row(p, 0,ypos,
+                     0,c->num_color[2]-1, k,k,
+                     0,c->num_color[1]-1, j,j,
+                     0,c->num_color[0]-1, i,i,
+                     c->num_vary_x);
+                  ypos += c->short_side_len + 3;
+               }
+            }
+         }
+      }
+      ypos += 2;
+      for (k=0; k < c->num_color[3]; ++k) {
+         for (j=0; j < c->num_color[4]; ++j) {
+            for (i=0; i < c->num_color[5]; ++i) {
+               for (q=0; q < c->num_vary_x; ++q) {
+                  stbhw__process_v_row(p, 0,ypos,
+                     0,c->num_color[0]-1, i,i,
+                     0,c->num_color[1]-1, j,j,
+                     0,c->num_color[5]-1, k,k,
+                     c->num_vary_y);
+                  ypos += (c->short_side_len*2) + 3;
+               }
+            }
+         }
+      }
+      assert(ypos == size_y);
+   }
+   return 1;
+}
+
+
+/////////////////////////////////////////////////////////////
+//
+//  MAP GENERATOR
+//
+
+static void stbhw__draw_pixel(unsigned char *output, int stride, int x, int y, unsigned char c[3])
+{
+   memcpy(output + y*stride + x*3, c, 3);
+}
+
+static void stbhw__draw_h_tile(unsigned char *output, int stride, int xmax, int ymax, int x, int y, stbhw_tile *h, int sz)
+{
+   int i,j;
+   for (j=0; j < sz; ++j)
+      if (y+j >= 0 && y+j < ymax)
+         for (i=0; i < sz*2; ++i)
+            if (x+i >= 0 && x+i < xmax)
+               stbhw__draw_pixel(output,stride, x+i,y+j, &h->pixels[(j*sz*2 + i)*3]);
+}
+
+static void stbhw__draw_v_tile(unsigned char *output, int stride, int xmax, int ymax, int x, int y, stbhw_tile *h, int sz)
+{
+   int i,j;
+   for (j=0; j < sz*2; ++j)
+      if (y+j >= 0 && y+j < ymax)
+         for (i=0; i < sz; ++i)
+            if (x+i >= 0 && x+i < xmax)
+               stbhw__draw_pixel(output,stride, x+i,y+j, &h->pixels[(j*sz + i)*3]);
+}
+
+
+// randomly choose a tile that fits constraints for a given spot, and update the constraints
+static stbhw_tile * stbhw__choose_tile(stbhw_tile **list, int numlist,
+                                      signed char *a, signed char *b, signed char *c,
+                                      signed char *d, signed char *e, signed char *f,
+                                      int **weighting)
+{
+   int i,n,m = 1<<30,pass;
+   for (pass=0; pass < 2; ++pass) {
+      n=0;
+      // pass #1:
+      //   count number of variants that match this partial set of constraints
+      // pass #2:
+      //   stop on randomly selected match
+      for (i=0; i < numlist; ++i) {
+         stbhw_tile *h = list[i];
+         if ((*a < 0 || *a == h->a) &&
+             (*b < 0 || *b == h->b) &&
+             (*c < 0 || *c == h->c) &&
+             (*d < 0 || *d == h->d) &&
+             (*e < 0 || *e == h->e) &&
+             (*f < 0 || *f == h->f)) {
+            if (weighting)
+               n += weighting[0][i];
+            else
+               n += 1;
+            if (n > m) {
+               // use list[i]
+               // update constraints to reflect what we placed
+               *a = h->a;
+               *b = h->b;
+               *c = h->c;
+               *d = h->d;
+               *e = h->e;
+               *f = h->f;
+               return h;
+            }
+         }
+      }
+      if (n == 0) {
+         stbhw_error = "couldn't find tile matching constraints";
+         return NULL;
+      }
+      m = STB_HBWANG_RAND() % n;
+   }
+   STB_HBWANG_ASSERT(0);
+   return NULL;
+}
+
+static int stbhw__match(int x, int y)
+{
+   return c_color[y][x] == c_color[y+1][x+1];
+}
+
+static int stbhw__weighted(int num_options, int *weights)
+{
+   int k, total, choice;
+   total = 0;
+   for (k=0; k < num_options; ++k)
+      total += weights[k];
+   choice = STB_HBWANG_RAND() % total;
+   total = 0;
+   for (k=0; k < num_options; ++k) {
+      total += weights[k];
+      if (choice < total)
+         break;
+   }
+   STB_HBWANG_ASSERT(k < num_options);
+   return k;
+}
+
+static int stbhw__change_color(int old_color, int num_options, int *weights)
+{
+   if (weights) {
+      int k, total, choice;
+      total = 0;
+      for (k=0; k < num_options; ++k)
+         if (k != old_color)
+            total += weights[k];
+      choice = STB_HBWANG_RAND() % total;
+      total = 0;
+      for (k=0; k < num_options; ++k) {
+         if (k != old_color) {
+            total += weights[k];
+            if (choice < total)
+               break;
+         }
+      }
+      STB_HBWANG_ASSERT(k < num_options);
+      return k;
+   } else {
+      int offset = 1+STB_HBWANG_RAND() % (num_options-1);
+      return (old_color+offset) % num_options;
+   }
+}
+
+
+
+// generate a map that is w * h pixels (3-bytes each)
+// returns 1 on success, 0 on error
+STBHW_EXTERN int stbhw_generate_image(stbhw_tileset *ts, int **weighting, unsigned char *output, int stride, int w, int h)
+{
+   int sidelen = ts->short_side_len;
+   int xmax = (w / sidelen) + 6;
+   int ymax = (h / sidelen) + 6;
+   if (xmax > STB_HBWANG_MAX_X+6 || ymax > STB_HBWANG_MAX_Y+6) {
+      stbhw_error = "increase STB_HBWANG_MAX_X/Y";
+      return 0;
+   }
+
+   if (ts->is_corner) {
+      int i,j, ypos;
+      int *cc = ts->num_color;
+
+      for (j=0; j < ymax; ++j) {
+         for (i=0; i < xmax; ++i) {
+            int p = (i-j+1)&3; // corner type
+            if (weighting==NULL || weighting[p]==0 || cc[p] == 1)
+               c_color[j][i] = STB_HBWANG_RAND() % cc[p];
+            else
+               c_color[j][i] = stbhw__weighted(cc[p], weighting[p]);
+         }
+      }
+      #ifndef STB_HBWANG_NO_REPITITION_REDUCTION
+      // now go back through and make sure we don't have adjancent 3x2 vertices that are identical,
+      // to avoid really obvious repetition (which happens easily with extreme weights)
+      for (j=0; j < ymax-3; ++j) {
+         for (i=0; i < xmax-3; ++i) {
+            //int p = (i-j+1) & 3; // corner type   // unused, not sure what the intent was so commenting it out
+            STB_HBWANG_ASSERT(i+3 < STB_HBWANG_MAX_X+6);
+            STB_HBWANG_ASSERT(j+3 < STB_HBWANG_MAX_Y+6);
+            if (stbhw__match(i,j) && stbhw__match(i,j+1) && stbhw__match(i,j+2)
+                && stbhw__match(i+1,j) && stbhw__match(i+1,j+1) && stbhw__match(i+1,j+2)) {
+               int p = ((i+1)-(j+1)+1) & 3;
+               if (cc[p] > 1)
+                  c_color[j+1][i+1] = stbhw__change_color(c_color[j+1][i+1], cc[p], weighting ? weighting[p] : NULL);
+            }
+            if (stbhw__match(i,j) && stbhw__match(i+1,j) && stbhw__match(i+2,j)
+                && stbhw__match(i,j+1) && stbhw__match(i+1,j+1) && stbhw__match(i+2,j+1)) {
+               int p = ((i+2)-(j+1)+1) & 3;
+               if (cc[p] > 1)
+                  c_color[j+1][i+2] = stbhw__change_color(c_color[j+1][i+2], cc[p], weighting ? weighting[p] : NULL);
+            }
+         }
+      }
+      #endif
+
+      ypos = -1 * sidelen;
+      for (j = -1; ypos < h; ++j) {
+         // a general herringbone row consists of:
+         //    horizontal left block, the bottom of a previous vertical, the top of a new vertical
+         int phase = (j & 3);
+         // displace horizontally according to pattern
+         if (phase == 0) {
+            i = 0;
+         } else {
+            i = phase-4;
+         }
+         for (;; i += 4) {
+            int xpos = i * sidelen;
+            if (xpos >= w)
+               break;
+            // horizontal left-block
+            if (xpos + sidelen*2 >= 0 && ypos >= 0) {
+               stbhw_tile *t = stbhw__choose_tile(
+                  ts->h_tiles, ts->num_h_tiles,
+                  &c_color[j+2][i+2], &c_color[j+2][i+3], &c_color[j+2][i+4],
+                  &c_color[j+3][i+2], &c_color[j+3][i+3], &c_color[j+3][i+4],
+                  weighting
+               );
+               if (t == NULL)
+                  return 0;
+               stbhw__draw_h_tile(output,stride,w,h, xpos, ypos, t, sidelen);
+            }
+            xpos += sidelen * 2;
+            // now we're at the end of a previous vertical one
+            xpos += sidelen;
+            // now we're at the start of a new vertical one
+            if (xpos < w) {
+               stbhw_tile *t = stbhw__choose_tile(
+                  ts->v_tiles, ts->num_v_tiles,
+                  &c_color[j+2][i+5], &c_color[j+3][i+5], &c_color[j+4][i+5],
+                  &c_color[j+2][i+6], &c_color[j+3][i+6], &c_color[j+4][i+6],
+                  weighting
+               );
+               if (t == NULL)
+                  return 0;
+               stbhw__draw_v_tile(output,stride,w,h, xpos, ypos,  t, sidelen);
+            }
+         }
+         ypos += sidelen;
+      }
+   } else {
+      // @TODO edge-color repetition reduction
+      int i,j, ypos;
+      memset(v_color, -1, sizeof(v_color));
+      memset(h_color, -1, sizeof(h_color));
+
+      ypos = -1 * sidelen;
+      for (j = -1; ypos<h; ++j) {
+         // a general herringbone row consists of:
+         //    horizontal left block, the bottom of a previous vertical, the top of a new vertical
+         int phase = (j & 3);
+         // displace horizontally according to pattern
+         if (phase == 0) {
+            i = 0;
+         } else {
+            i = phase-4;
+         }
+         for (;; i += 4) {
+            int xpos = i * sidelen;
+            if (xpos >= w)
+               break;
+            // horizontal left-block
+            if (xpos + sidelen*2 >= 0 && ypos >= 0) {
+               stbhw_tile *t = stbhw__choose_tile(
+                  ts->h_tiles, ts->num_h_tiles,
+                  &h_color[j+2][i+2], &h_color[j+2][i+3],
+                  &v_color[j+2][i+2], &v_color[j+2][i+4],
+                  &h_color[j+3][i+2], &h_color[j+3][i+3],
+                  weighting
+               );
+               if (t == NULL) return 0;
+               stbhw__draw_h_tile(output,stride,w,h, xpos, ypos, t, sidelen);
+            }
+            xpos += sidelen * 2;
+            // now we're at the end of a previous vertical one
+            xpos += sidelen;
+            // now we're at the start of a new vertical one
+            if (xpos < w) {
+               stbhw_tile *t = stbhw__choose_tile(
+                  ts->v_tiles, ts->num_v_tiles,
+                  &h_color[j+2][i+5],
+                  &v_color[j+2][i+5], &v_color[j+2][i+6],
+                  &v_color[j+3][i+5], &v_color[j+3][i+6],
+                  &h_color[j+4][i+5],
+                  weighting
+               );
+               if (t == NULL) return 0;
+               stbhw__draw_v_tile(output,stride,w,h, xpos, ypos,  t, sidelen);
+            }
+         }
+         ypos += sidelen;
+      }
+   }
+   return 1;
+}
+
+static void stbhw__parse_h_rect(stbhw__process *p, int xpos, int ypos,
+                            int a, int b, int c, int d, int e, int f)
+{
+   int len = p->c->short_side_len;
+   stbhw_tile *h = (stbhw_tile *) malloc(sizeof(*h)-1 + 3 * (len*2) * len);
+   int i,j;
+   ++xpos;
+   ++ypos;
+   h->a = a, h->b = b, h->c = c, h->d = d, h->e = e, h->f = f;
+   for (j=0; j < len; ++j)
+      for (i=0; i < len*2; ++i)
+         memcpy(h->pixels + j*(3*len*2) + i*3, p->data+(ypos+j)*p->stride+(xpos+i)*3, 3);
+   STB_HBWANG_ASSERT(p->ts->num_h_tiles < p->ts->max_h_tiles);
+   p->ts->h_tiles[p->ts->num_h_tiles++] = h;
+}
+
+static void stbhw__parse_v_rect(stbhw__process *p, int xpos, int ypos,
+                            int a, int b, int c, int d, int e, int f)
+{
+   int len = p->c->short_side_len;
+   stbhw_tile *h = (stbhw_tile *) malloc(sizeof(*h)-1 + 3 * (len*2) * len);
+   int i,j;
+   ++xpos;
+   ++ypos;
+   h->a = a, h->b = b, h->c = c, h->d = d, h->e = e, h->f = f;
+   for (j=0; j < len*2; ++j)
+      for (i=0; i < len; ++i)
+         memcpy(h->pixels + j*(3*len) + i*3, p->data+(ypos+j)*p->stride+(xpos+i)*3, 3);
+   STB_HBWANG_ASSERT(p->ts->num_v_tiles < p->ts->max_v_tiles);
+   p->ts->v_tiles[p->ts->num_v_tiles++] = h;
+}
+
+STBHW_EXTERN int stbhw_build_tileset_from_image(stbhw_tileset *ts, unsigned char *data, int stride, int w, int h)
+{
+   int i, h_count, v_count;
+   unsigned char header[9];
+   stbhw_config c = { 0 };
+   stbhw__process p = { 0 };
+
+   // extract binary header
+
+   // remove encoding that makes it more visually obvious it encodes actual data
+   for (i=0; i < 9; ++i)
+      header[i] = data[w*3 - 1 - i] ^ (i*55);
+
+   // extract header info
+   if (header[7] == 0xc0) {
+      // corner-type
+      c.is_corner = 1;
+      for (i=0; i < 4; ++i)
+         c.num_color[i] = header[i];
+      c.num_vary_x = header[4];
+      c.num_vary_y = header[5];
+      c.short_side_len = header[6];
+   } else {
+      c.is_corner = 0;
+      // edge-type
+      for (i=0; i < 6; ++i)
+         c.num_color[i] = header[i];
+      c.num_vary_x = header[6];
+      c.num_vary_y = header[7];
+      c.short_side_len = header[8];
+   }
+
+   if (c.num_vary_x < 0 || c.num_vary_x > 64 || c.num_vary_y < 0 || c.num_vary_y > 64)
+      return 0;
+   if (c.short_side_len == 0)
+      return 0;
+   if (c.num_color[0] > 32 || c.num_color[1] > 32 || c.num_color[2] > 32 || c.num_color[3] > 32)
+      return 0;
+
+   stbhw__get_template_info(&c, NULL, NULL, &h_count, &v_count);
+
+   ts->is_corner = c.is_corner;
+   ts->short_side_len = c.short_side_len;
+   memcpy(ts->num_color, c.num_color, sizeof(ts->num_color));
+
+   ts->max_h_tiles = h_count;
+   ts->max_v_tiles = v_count;
+
+   ts->num_h_tiles = ts->num_v_tiles = 0;
+
+   ts->h_tiles = (stbhw_tile **) malloc(sizeof(*ts->h_tiles) * h_count);
+   ts->v_tiles = (stbhw_tile **) malloc(sizeof(*ts->v_tiles) * v_count);
+
+   p.ts = ts;
+   p.data = data;
+   p.stride = stride;
+   p.process_h_rect = stbhw__parse_h_rect;
+   p.process_v_rect = stbhw__parse_v_rect;
+   p.w = w;
+   p.h = h;
+   p.c = &c;
+
+   // load all the tiles out of the image
+   return stbhw__process_template(&p);
+}
+
+STBHW_EXTERN void stbhw_free_tileset(stbhw_tileset *ts)
+{
+   int i;
+   for (i=0; i < ts->num_h_tiles; ++i)
+      free(ts->h_tiles[i]);
+   for (i=0; i < ts->num_v_tiles; ++i)
+      free(ts->v_tiles[i]);
+   free(ts->h_tiles);
+   free(ts->v_tiles);
+   ts->h_tiles = NULL;
+   ts->v_tiles = NULL;
+   ts->num_h_tiles = ts->max_h_tiles = 0;
+   ts->num_v_tiles = ts->max_v_tiles = 0;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//               GENERATOR
+//
+//
+
+
+// shared code
+
+static void stbhw__set_pixel(unsigned char *data, int stride, int xpos, int ypos, unsigned char color[3])
+{
+   memcpy(data + ypos*stride + xpos*3, color, 3);
+}
+
+static void stbhw__stbhw__set_pixel_whiten(unsigned char *data, int stride, int xpos, int ypos, unsigned char color[3])
+{
+   unsigned char c2[3];
+   int i;
+   for (i=0; i < 3; ++i)
+      c2[i] = (color[i]*2 + 255)/3;
+   memcpy(data + ypos*stride + xpos*3, c2, 3);
+}
+
+
+static unsigned char stbhw__black[3] = { 0,0,0 };
+
+// each edge set gets its own unique color variants
+// used http://phrogz.net/css/distinct-colors.html to generate this set,
+// but it's not very good and needs to be revised
+
+static unsigned char stbhw__color[7][8][3] =
+{
+   { {255,51,51}  , {143,143,29}, {0,199,199}, {159,119,199},     {0,149,199}  , {143, 0,143}, {255,128,0}, {64,255,0},  },
+   { {235,255,30 }, {255,0,255},  {199,139,119},  {29,143, 57},    {143,0,71}   , { 0,143,143}, {0,99,199}, {143,71,0},  },
+   { {0,149,199}  , {143, 0,143}, {255,128,0}, {64,255,0},        {255,191,0}  , {51,255,153}, {0,0,143}, {199,119,159},},
+   { {143,0,71}   , { 0,143,143}, {0,99,199}, {143,71,0},         {255,190,153}, { 0,255,255}, {128,0,255}, {255,51,102},},
+   { {255,191,0}  , {51,255,153}, {0,0,143}, {199,119,159},       {255,51,51}  , {143,143,29}, {0,199,199}, {159,119,199},},
+   { {255,190,153}, { 0,255,255}, {128,0,255}, {255,51,102},      {235,255,30 }, {255,0,255}, {199,139,119},  {29,143, 57}, },
+
+   { {40,40,40 },  { 90,90,90 }, { 150,150,150 }, { 200,200,200 },
+     { 255,90,90 }, { 160,160,80}, { 50,150,150 }, { 200,50,200 } },
+};
+
+static void stbhw__draw_hline(unsigned char *data, int stride, int xpos, int ypos, int color, int len, int slot)
+{
+   int i;
+   int j = len * 6 / 16;
+   int k = len * 10 / 16;
+   for (i=0; i < len; ++i)
+      stbhw__set_pixel(data, stride, xpos+i, ypos, stbhw__black);
+   if (k-j < 2) {
+      j = len/2 - 1;
+      k = j+2;
+      if (len & 1)
+         ++k;
+   }
+   for (i=j; i < k; ++i)
+      stbhw__stbhw__set_pixel_whiten(data, stride, xpos+i, ypos, stbhw__color[slot][color]);
+}
+
+static void stbhw__draw_vline(unsigned char *data, int stride, int xpos, int ypos, int color, int len, int slot)
+{
+   int i;
+   int j = len * 6 / 16;
+   int k = len * 10 / 16;
+   for (i=0; i < len; ++i)
+      stbhw__set_pixel(data, stride, xpos, ypos+i, stbhw__black);
+   if (k-j < 2) {
+      j = len/2 - 1;
+      k = j+2;
+      if (len & 1)
+         ++k;
+   }
+   for (i=j; i < k; ++i)
+      stbhw__stbhw__set_pixel_whiten(data, stride, xpos, ypos+i, stbhw__color[slot][color]);
+}
+
+//                 0--*--1--*--2--*--3
+//                 |     |           |
+//                 *     *           *
+//                 |     |           |
+//     1--*--2--*--3     0--*--1--*--2
+//     |           |     |
+//     *           *     *
+//     |           |     |
+//     0--*--1--*--2--*--3
+//
+// variables while enumerating (no correspondence between corners
+// of the types is implied by these variables)
+//
+//     a-----b-----c      a-----d
+//     |           |      |     |
+//     |           |      |     |
+//     |           |      |     |
+//     d-----e-----f      b     e
+//                        |     |
+//                        |     |
+//                        |     |
+//                        c-----f
+//
+
+unsigned char stbhw__corner_colors[4][4][3] =
+{
+   { { 255,0,0 }, { 200,200,200 }, { 100,100,200 }, { 255,200,150 }, },
+   { { 0,0,255 }, { 255,255,0 },   { 100,200,100 }, { 150,255,200 }, },
+   { { 255,0,255 }, { 80,80,80 },  { 200,100,100 }, { 200,150,255 }, },
+   { { 0,255,255 }, { 0,255,0 },   { 200,120,200 }, { 255,200,200 }, },
+};
+
+int stbhw__corner_colors_to_edge_color[4][4] =
+{
+   // 0   1   2   3
+   {  0,  1,  4,  9, }, // 0
+   {  2,  3,  5, 10, }, // 1
+   {  6,  7,  8, 11, }, // 2
+   { 12, 13, 14, 15, }, // 3
+};
+
+#define stbhw__c2e stbhw__corner_colors_to_edge_color
+
+static void stbhw__draw_clipped_corner(unsigned char *data, int stride, int xpos, int ypos, int w, int h, int x, int y)
+{
+   static unsigned char template_color[3] = { 167,204,204 };
+   int i,j;
+   for (j = -2; j <= 1; ++j) {
+      for (i = -2; i <= 1; ++i) {
+         if ((i == -2 || i == 1) && (j == -2 || j == 1))
+            continue;
+         else {
+            if (x+i < 1 || x+i > w) continue;
+            if (y+j < 1 || y+j > h) continue;
+            stbhw__set_pixel(data, stride, xpos+x+i, ypos+y+j, template_color);
+
+         }
+      }
+   }
+}
+
+static void stbhw__edge_process_h_rect(stbhw__process *p, int xpos, int ypos,
+                            int a, int b, int c, int d, int e, int f)
+{
+   int len = p->c->short_side_len;
+   stbhw__draw_hline(p->data, p->stride, xpos+1        , ypos        , a, len, 2);
+   stbhw__draw_hline(p->data, p->stride, xpos+  len+1  , ypos        , b, len, 3);
+   stbhw__draw_vline(p->data, p->stride, xpos          , ypos+1      , c, len, 1);
+   stbhw__draw_vline(p->data, p->stride, xpos+2*len+1  , ypos+1      , d, len, 4);
+   stbhw__draw_hline(p->data, p->stride, xpos+1        , ypos + len+1, e, len, 0);
+   stbhw__draw_hline(p->data, p->stride, xpos + len+1  , ypos + len+1, f, len, 2);
+}
+
+static void stbhw__edge_process_v_rect(stbhw__process *p, int xpos, int ypos,
+                            int a, int b, int c, int d, int e, int f)
+{
+   int len = p->c->short_side_len;
+   stbhw__draw_hline(p->data, p->stride, xpos+1      , ypos          , a, len, 0);
+   stbhw__draw_vline(p->data, p->stride, xpos        , ypos+1        , b, len, 5);
+   stbhw__draw_vline(p->data, p->stride, xpos + len+1, ypos+1        , c, len, 1);
+   stbhw__draw_vline(p->data, p->stride, xpos        , ypos +   len+1, d, len, 4);
+   stbhw__draw_vline(p->data, p->stride, xpos + len+1, ypos +   len+1, e, len, 5);
+   stbhw__draw_hline(p->data, p->stride, xpos+1      , ypos + 2*len+1, f, len, 3);
+}
+
+static void stbhw__corner_process_h_rect(stbhw__process *p, int xpos, int ypos,
+                            int a, int b, int c, int d, int e, int f)
+{
+   int len = p->c->short_side_len;
+
+   stbhw__draw_hline(p->data, p->stride, xpos+1        , ypos        , stbhw__c2e[a][b], len, 2);
+   stbhw__draw_hline(p->data, p->stride, xpos+  len+1  , ypos        , stbhw__c2e[b][c], len, 3);
+   stbhw__draw_vline(p->data, p->stride, xpos          , ypos+1      , stbhw__c2e[a][d], len, 1);
+   stbhw__draw_vline(p->data, p->stride, xpos+2*len+1  , ypos+1      , stbhw__c2e[c][f], len, 4);
+   stbhw__draw_hline(p->data, p->stride, xpos+1        , ypos + len+1, stbhw__c2e[d][e], len, 0);
+   stbhw__draw_hline(p->data, p->stride, xpos + len+1  , ypos + len+1, stbhw__c2e[e][f], len, 2);
+
+   if (p->c->corner_type_color_template[1][a]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len*2,len, 1,1);
+   if (p->c->corner_type_color_template[2][b]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len*2,len, len+1,1);
+   if (p->c->corner_type_color_template[3][c]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len*2,len, len*2+1,1);
+
+   if (p->c->corner_type_color_template[0][d]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len*2,len, 1,len+1);
+   if (p->c->corner_type_color_template[1][e]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len*2,len, len+1,len+1);
+   if (p->c->corner_type_color_template[2][f]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len*2,len, len*2+1,len+1);
+
+   stbhw__set_pixel(p->data, p->stride, xpos        , ypos, stbhw__corner_colors[1][a]);
+   stbhw__set_pixel(p->data, p->stride, xpos+len    , ypos, stbhw__corner_colors[2][b]);
+   stbhw__set_pixel(p->data, p->stride, xpos+2*len+1, ypos, stbhw__corner_colors[3][c]);
+   stbhw__set_pixel(p->data, p->stride, xpos        , ypos+len+1, stbhw__corner_colors[0][d]);
+   stbhw__set_pixel(p->data, p->stride, xpos+len    , ypos+len+1, stbhw__corner_colors[1][e]);
+   stbhw__set_pixel(p->data, p->stride, xpos+2*len+1, ypos+len+1, stbhw__corner_colors[2][f]);
+}
+
+static void stbhw__corner_process_v_rect(stbhw__process *p, int xpos, int ypos,
+                            int a, int b, int c, int d, int e, int f)
+{
+   int len = p->c->short_side_len;
+
+   stbhw__draw_hline(p->data, p->stride, xpos+1      , ypos          , stbhw__c2e[a][d], len, 0);
+   stbhw__draw_vline(p->data, p->stride, xpos        , ypos+1        , stbhw__c2e[a][b], len, 5);
+   stbhw__draw_vline(p->data, p->stride, xpos + len+1, ypos+1        , stbhw__c2e[d][e], len, 1);
+   stbhw__draw_vline(p->data, p->stride, xpos        , ypos +   len+1, stbhw__c2e[b][c], len, 4);
+   stbhw__draw_vline(p->data, p->stride, xpos + len+1, ypos +   len+1, stbhw__c2e[e][f], len, 5);
+   stbhw__draw_hline(p->data, p->stride, xpos+1      , ypos + 2*len+1, stbhw__c2e[c][f], len, 3);
+
+   if (p->c->corner_type_color_template[0][a]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len,len*2, 1,1);
+   if (p->c->corner_type_color_template[3][b]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len,len*2, 1,len+1);
+   if (p->c->corner_type_color_template[2][c]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len,len*2, 1,len*2+1);
+
+   if (p->c->corner_type_color_template[1][d]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len,len*2, len+1,1);
+   if (p->c->corner_type_color_template[0][e]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len,len*2, len+1,len+1);
+   if (p->c->corner_type_color_template[3][f]) stbhw__draw_clipped_corner(p->data,p->stride, xpos,ypos, len,len*2, len+1,len*2+1);
+
+   stbhw__set_pixel(p->data, p->stride, xpos      , ypos        , stbhw__corner_colors[0][a]);
+   stbhw__set_pixel(p->data, p->stride, xpos      , ypos+len    , stbhw__corner_colors[3][b]);
+   stbhw__set_pixel(p->data, p->stride, xpos      , ypos+2*len+1, stbhw__corner_colors[2][c]);
+   stbhw__set_pixel(p->data, p->stride, xpos+len+1, ypos        , stbhw__corner_colors[1][d]);
+   stbhw__set_pixel(p->data, p->stride, xpos+len+1, ypos+len    , stbhw__corner_colors[0][e]);
+   stbhw__set_pixel(p->data, p->stride, xpos+len+1, ypos+2*len+1, stbhw__corner_colors[3][f]);
+}
+
+// generates a template image, assuming data is 3*w*h bytes long, RGB format
+STBHW_EXTERN int stbhw_make_template(stbhw_config *c, unsigned char *data, int w, int h, int stride_in_bytes)
+{
+   stbhw__process p;
+   int i;
+
+   p.data = data;
+   p.w = w;
+   p.h = h;
+   p.stride = stride_in_bytes;
+   p.ts = 0;
+   p.c = c;
+
+   if (c->is_corner) {
+      p.process_h_rect = stbhw__corner_process_h_rect;
+      p.process_v_rect = stbhw__corner_process_v_rect;
+   } else {
+      p.process_h_rect = stbhw__edge_process_h_rect;
+      p.process_v_rect = stbhw__edge_process_v_rect;
+   }
+
+   for (i=0; i < p.h; ++i)
+      memset(p.data + i*p.stride, 255, 3*p.w);
+
+   if (!stbhw__process_template(&p))
+      return 0;
+
+   if (c->is_corner) {
+      // write out binary information in first line of image
+      for (i=0; i < 4; ++i)
+         data[w*3-1-i] = c->num_color[i];
+      data[w*3-1-i] = c->num_vary_x;
+      data[w*3-2-i] = c->num_vary_y;
+      data[w*3-3-i] = c->short_side_len;
+      data[w*3-4-i] = 0xc0;
+   } else {
+      for (i=0; i < 6; ++i)
+         data[w*3-1-i] = c->num_color[i];
+      data[w*3-1-i] = c->num_vary_x;
+      data[w*3-2-i] = c->num_vary_y;
+      data[w*3-3-i] = c->short_side_len;
+   }
+
+   // make it more obvious it encodes actual data
+   for (i=0; i < 9; ++i)
+      p.data[p.w*3 - 1 - i] ^= i*55;
+
+   return 1;
+}
+#endif // STB_HBWANG_IMPLEMENTATION
--- /dev/null
+++ b/include-demo/stb_hexwave.h
@@ -1,0 +1,680 @@
+// stb_hexwave - v0.5 - public domain, initial release 2021-04-01
+//
+// A flexible anti-aliased (bandlimited) digital audio oscillator.
+//
+// This library generates waveforms of a variety of shapes made of
+// line segments. It does not do envelopes, LFO effects, etc.; it
+// merely tries to solve the problem of generating an artifact-free
+// morphable digital waveform with a variety of spectra, and leaves
+// it to the user to rescale the waveform and mix multiple voices, etc.
+//
+// Compiling:
+//
+//   In one C/C++ file that #includes this file, do
+//
+//      #define STB_HEXWAVE_IMPLEMENTATION
+//      #include "stb_hexwave.h"
+//
+//   Optionally, #define STB_HEXWAVE_STATIC before including
+//   the header to cause the definitions to be private to the
+//   implementation file (i.e. to be "static" instead of "extern").
+//
+// Notes:
+//
+//   Optionally performs memory allocation during initialization,
+//   never allocates otherwise.
+//
+// License:
+//
+//   See end of file for license information.
+//
+// Usage:
+//
+//   Initialization:
+//
+//     hexwave_init(32,16,NULL); // read "header section" for alternatives
+//
+//   Create oscillator:
+//
+//     HexWave *osc = malloc(sizeof(*osc)); // or "new HexWave", or declare globally or on stack
+//     hexwave_create(osc, reflect_flag, peak_time, half_height, zero_wait);
+//       see "Waveform shapes" below for the meaning of these parameters
+//
+//   Generate audio:
+//
+//     hexwave_generate_samples(output, number_of_samples, osc, oscillator_freq)
+//       where:
+//         output is a buffer where the library will store floating point audio samples
+//         number_of_samples is the number of audio samples to generate
+//         osc is a pointer to a Hexwave
+//         oscillator_freq is the frequency of the oscillator divided by the sample rate
+//
+//       The output samples will continue from where the samples generated by the
+//       previous hexwave_generate_samples() on this oscillator ended.
+//
+//   Change oscillator waveform:
+//
+//     hexwave_change(osc, reflect_flag, peak_time, half_height, zero_wait);
+//       can call in between calls to hexwave_generate_samples
+//
+// Waveform shapes:
+//
+//   All waveforms generated by hexwave are constructed from six line segments
+//   characterized by 3 parameters.
+//
+//   See demonstration: https://www.youtube.com/watch?v=hsUCrAsDN-M
+//
+//                 reflect=0                          reflect=1
+//
+//           0-----P---1                        0-----P---1    peak_time = P
+//                 .     1                            .     1
+//                /\_    :                           /\_    :
+//               /   \_  :                          /   \_  :
+//              /      \.H                         /      \.H  half_height = H
+//             /       | :                        /       | :
+//       _____/        |_:___               _____/        | :       _____
+//           .           :   \        |         .         | :      /
+//           .           :    \       |         .         | :     /
+//           .           :     \     _/         .         \_:    /
+//           .           :      \  _/           .           :_  /
+//           .          -1       \/             .          -1 \/
+//       0 - Z - - - - 1                    0 - Z - - - - 1   zero_wait = Z
+//
+//    Classic waveforms:
+//                               peak    half    zero
+//                     reflect   time   height   wait
+//      Sawtooth          1       0       0       0
+//      Square            1       0       1       0
+//      Triangle          1       0.5     0       0
+//
+//    Some waveforms can be produced in multiple ways, which is useful when morphing
+//    into other waveforms, and there are a few more notable shapes:
+//
+//                               peak    half    zero
+//                     reflect   time   height   wait
+//      Sawtooth          1       1      any      0
+//      Sawtooth (8va)    1       0      -1       0
+//      Triangle          1       0.5     0       0
+//      Square            1       0       1       0
+//      Square            0       0       1       0
+//      Triangle          0       0.5     0       0
+//      Triangle          0       0      -1       0
+//      AlternatingSaw    0       0       0       0
+//      AlternatingSaw    0       1      any      0
+//      Stairs            0       0       1       0.5
+//
+//    The "Sawtooth (8va)" waveform is identical to a sawtooth wave with 2x the
+//    frequency, but when morphed with other values, it becomes an overtone of
+//    the base frequency.
+//
+//  Morphing waveforms:
+//
+//    Sweeping peak_time morphs the waveform while producing various spectra.
+//    Sweeping half_height effectively crossfades between two waveforms; useful, but less exciting.
+//    Sweeping zero_wait produces a similar effect no matter the reset of the waveform,
+//        a sort of high-pass/PWM effect where the wave becomes silent at zero_wait=1.
+//
+//    You can trivially morph between any two waveforms from the above table
+//    which only differ in one column.
+//
+//    Crossfade between classic waveforms:
+//                                            peak     half    zero
+//        Start         End         reflect   time    height   wait
+//        -----         ---         -------   ----    ------   ----
+//        Triangle      Square         0       0      -1..1    0
+//        Saw           Square         1       0       0..1    0
+//        Triangle      Saw            1       0.5     0..2    0
+//
+//    The last morph uses uses half-height values larger than 1, which means it will
+//    be louder and the output should be scaled down by half to compensate, or better
+//    by dynamically tracking the morph: volume_scale = 1 - half_height/4
+//
+//    Non-crossfade morph between classic waveforms, most require changing
+//    two parameters at the same time:
+//                                           peak     half    zero
+//      Start         End         reflect    time    height   wait
+//      -----         ---         -------    ----    ------   ----
+//      Square        Triangle      any      0..0.5   1..0     0
+//      Square        Saw            1       0..1     1..any   0
+//      Triangle      Saw            1     0.5..1     0..-1    0
+//
+//    Other noteworthy morphs between simple shapes:
+//                                                            peak     half    zero
+//      Start           Halfway       End          reflect    time    height   wait
+//      -----           ---------     ---          -------    ----    ------   ----
+//      Saw (8va,neg)                Saw (pos)        1       0..1      -1      0
+//      Saw (neg)                    Saw (pos)        1       0..1       0      0
+//      Triangle                     AlternatingSaw   0       0..1      -1      0
+//      AlternatingSaw  Triangle     AlternatingSaw   0       0..1       0      0
+//      Square                       AlternatingSaw   0       0..1       1      0
+//      Triangle        Triangle     AlternatingSaw   0       0..1    -1..1     0
+//      Square                       AlternatingSaw   0       0..1     1..0     0
+//      Saw (8va)       Triangle     Saw              1       0..1    -1..1     0
+//      Saw (neg)                    Saw (pos)        1       0..1     0..1     0
+//      AlternatingSaw               AlternatingSaw   0       0..1     0..any   0
+//
+//   The last entry is noteworthy because the morph from the halfway point to either
+//   endpoint sounds very different. For example, an LFO sweeping back and forth over
+//   the whole range will morph between the middle timbre and the AlternatingSaw
+//   timbre in two different ways, alternating.
+//
+//   Entries with "any" for half_height are whole families of morphs, as you can pick
+//   any value you want as the endpoint for half_height.
+//
+//   You can always morph between any two waveforms with the same value of 'reflect'
+//   by just sweeping the parameters simultaneously. There will never be artifacts
+//   and the result will always be useful, if not necessarily what you want.
+//
+//   You can vary the sound of two-parameter morphs by ramping them differently,
+//   e.g. if the morph goes from t=0..1, then square-to-triangle looks like:
+//        peak_time   = lerp(t, 0, 0.5)
+//        half_height = lerp(t, 1, 0  )
+//   but you can also do things like:
+//        peak_time   = lerp(smoothstep(t), 0, 0.5)
+//        half_height = cos(PI/2 * t)
+//
+// How it works:
+//
+//   hexwave use BLEP to bandlimit discontinuities and BLAMP
+//   to bandlimit C1 discontinuities. This is not polyBLEP
+//   (polynomial BLEP), it is table-driven BLEP. It is
+//   also not minBLEP (minimum-phase BLEP), as that complicates
+//   things for little benefit once BLAMP is involved.
+//
+//   The previous oscillator frequency is remembered, and when
+//   the frequency changes, a BLAMP is generated to remove the
+//   C1 discontinuity, which reduces artifacts for sweeps/LFO.
+//
+//   Changes to an oscillator timbre using hexwave_change() actually
+//   wait until the oscillator finishes its current cycle. All
+//   waveforms with non-zero "zero_wait" settings pass through 0
+//   and have 0-slope at the start of a cycle, which means changing
+//   the settings is artifact free at that time. (If zero_wait is 0,
+//   the code still treats it as passing through 0 with 0-slope; it'll
+//   apply the necessary fixups to make it artifact free as if it does
+//   transition to 0 with 0-slope vs. the waveform at the end of
+//   the cycle, then adds the fixups for a non-0 and non-0 slope
+//   at the start of the cycle, which cancels out if zero_wait is 0,
+//   and still does the right thing if zero_wait is 0 when the
+//   settings are updated.)
+//
+//   BLEP/BLAMP normally requires overlapping buffers, but this
+//   is hidden from the user by generating the waveform to a
+//   temporary buffer and saving the overlap regions internally
+//   between calls. (It is slightly more complicated; see code.)
+//
+//   By design all shapes have 0 DC offset; this is one reason
+//   hexwave uses zero_wait instead of standard PWM.
+//
+//   The internals of hexwave could support any arbitrary shape
+//   made of line segments, but I chose not to expose this
+//   generality in favor of a simple, easy-to-use API.
+
+#ifndef STB_INCLUDE_STB_HEXWAVE_H
+#define STB_INCLUDE_STB_HEXWAVE_H
+
+#ifndef STB_HEXWAVE_MAX_BLEP_LENGTH
+#define STB_HEXWAVE_MAX_BLEP_LENGTH   64 // good enough for anybody
+#endif
+
+#ifdef STB_HEXWAVE_STATIC
+#define STB_HEXWAVE_DEF static
+#else
+#define STB_HEXWAVE_DEF extern
+#endif
+
+typedef struct HexWave HexWave;
+
+STB_HEXWAVE_DEF void hexwave_init(int width, int oversample, float *user_buffer);
+//         width: size of BLEP, from 4..64, larger is slower & more memory but less aliasing
+//    oversample: 2+, number of subsample positions, larger uses more memory but less noise
+//   user_buffer: optional, if provided the library will perform no allocations.
+//                16*width*(oversample+1) bytes, must stay allocated as long as library is used
+//                technically it only needs:   8*( width * (oversample  + 1))
+//                                           + 8*((width *  oversample) + 1)  bytes
+//
+// width can be larger than 64 if you define STB_HEXWAVE_MAX_BLEP_LENGTH to a larger value
+
+STB_HEXWAVE_DEF void hexwave_shutdown(float *user_buffer);
+//       user_buffer: pass in same parameter as passed to hexwave_init
+
+STB_HEXWAVE_DEF void hexwave_create(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait);
+// see docs above for description
+//
+//   reflect is tested as 0 or non-zero
+//   peak_time is clamped to 0..1
+//   half_height is not clamped
+//   zero_wait is clamped to 0..1
+
+STB_HEXWAVE_DEF void hexwave_change(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait);
+// see docs
+
+STB_HEXWAVE_DEF void hexwave_generate_samples(float *output, int num_samples, HexWave *hex, float freq);
+//            output: buffer where the library will store generated floating point audio samples
+// number_of_samples: the number of audio samples to generate
+//               osc: pointer to a Hexwave initialized with 'hexwave_create'
+//   oscillator_freq: frequency of the oscillator divided by the sample rate
+
+// private:
+typedef struct
+{
+   int   reflect;
+   float peak_time;
+   float zero_wait;
+   float half_height;
+} HexWaveParameters;
+
+struct HexWave
+{
+   float t, prev_dt;
+   HexWaveParameters current, pending;
+   int have_pending;
+   float buffer[STB_HEXWAVE_MAX_BLEP_LENGTH];
+}; 
+#endif
+
+#ifdef STB_HEXWAVE_IMPLEMENTATION
+
+#ifndef STB_HEXWAVE_NO_ALLOCATION
+#include <stdlib.h> // malloc,free
+#endif
+
+#include <string.h> // memset,memcpy,memmove
+#include <math.h>   // sin,cos,fabs
+
+#define hexwave_clamp(v,a,b)   ((v) < (a) ? (a) : (v) > (b) ? (b) : (v))
+
+STB_HEXWAVE_DEF void hexwave_change(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait)
+{
+   hex->pending.reflect     = reflect;
+   hex->pending.peak_time   = hexwave_clamp(peak_time,0,1);
+   hex->pending.half_height = half_height;
+   hex->pending.zero_wait   = hexwave_clamp(zero_wait,0,1);
+   // put a barrier here to allow changing from a different thread than the generator
+   hex->have_pending        = 1;
+}
+
+STB_HEXWAVE_DEF void hexwave_create(HexWave *hex, int reflect, float peak_time, float half_height, float zero_wait)
+{
+   memset(hex, 0, sizeof(*hex));
+   hexwave_change(hex, reflect, peak_time, half_height, zero_wait);
+   hex->current = hex->pending;
+   hex->have_pending = 0;
+   hex->t = 0;
+   hex->prev_dt = 0;
+}
+
+static struct
+{
+   int width;       // width of fixup in samples
+   int oversample;  // number of oversampled versions (there's actually one more to allow lerpign)
+   float *blep;
+   float *blamp;
+} hexblep;
+
+static void hex_add_oversampled_bleplike(float *output, float time_since_transition, float scale, float *data)
+{
+   float *d1,*d2;
+   float lerpweight;
+   int i, bw = hexblep.width;
+
+   int slot = (int) (time_since_transition * hexblep.oversample);
+   if (slot >= hexblep.oversample)
+      slot = hexblep.oversample-1; // clamp in case the floats overshoot
+
+   d1 = &data[ slot   *bw];
+   d2 = &data[(slot+1)*bw];
+
+   lerpweight = time_since_transition * hexblep.oversample - slot;
+   for (i=0; i < bw; ++i)
+      output[i] += scale * (d1[i] + (d2[i]-d1[i])*lerpweight);
+}
+
+static void hex_blep (float *output, float time_since_transition, float scale)
+{
+   hex_add_oversampled_bleplike(output, time_since_transition, scale, hexblep.blep);
+}
+
+static void hex_blamp(float *output, float time_since_transition, float scale)
+{
+   hex_add_oversampled_bleplike(output, time_since_transition, scale, hexblep.blamp);
+}
+
+typedef struct
+{
+   float t,v,s; // time, value, slope
+} hexvert;
+
+// each half of the waveform needs 4 vertices to represent 3 line
+// segments, plus 1 more for wraparound
+static void hexwave_generate_linesegs(hexvert vert[9], HexWave *hex, float dt)
+{
+   int j;
+   float min_len = dt / 256.0f;
+
+   vert[0].t = 0;
+   vert[0].v = 0;
+   vert[1].t = hex->current.zero_wait*0.5f;
+   vert[1].v = 0;
+   vert[2].t = 0.5f*hex->current.peak_time + vert[1].t*(1-hex->current.peak_time);
+   vert[2].v = 1;
+   vert[3].t = 0.5f;
+   vert[3].v = hex->current.half_height;
+
+   if (hex->current.reflect) {
+      for (j=4; j <= 7; ++j) {
+         vert[j].t = 1 -  vert[7-j].t;
+         vert[j].v =    - vert[7-j].v;
+      }
+   } else {
+      for (j=4; j <= 7; ++j) {
+         vert[j].t =  0.5f +  vert[j-4].t;
+         vert[j].v =        - vert[j-4].v;
+      }
+   }
+   vert[8].t = 1;
+   vert[8].v = 0;
+
+   for (j=0; j < 8; ++j) {
+      if (vert[j+1].t <= vert[j].t + min_len) {
+          // if change takes place over less than a fraction of a sample treat as discontinuity
+          //
+          // otherwise the slope computation can blow up to arbitrarily large and we
+          // try to generate a huge BLAMP and the result is wrong.
+          // 
+          // why does this happen if the math is right? i believe if done perfectly,
+          // the two BLAMPs on either side of the slope would cancel out, but our
+          // BLAMPs have only limited sub-sample precision and limited integration
+          // accuracy. or maybe it's just the math blowing up w/ floating point precision
+          // limits as we try to make x * (1/x) cancel out
+          //
+          // min_len verified artifact-free even near nyquist with only oversample=4
+         vert[j+1].t = vert[j].t;
+      }
+   }
+
+   if (vert[8].t != 1.0f) {
+      // if the above fixup moved the endpoint away from 1.0, move it back,
+      // along with any other vertices that got moved to the same time
+      float t = vert[8].t;
+      for (j=5; j <= 8; ++j)
+         if (vert[j].t == t)
+            vert[j].t = 1.0f;
+   }
+
+   // compute the exact slopes from the final fixed-up positions
+   for (j=0; j < 8; ++j)
+      if (vert[j+1].t == vert[j].t)
+         vert[j].s = 0;
+      else
+         vert[j].s = (vert[j+1].v - vert[j].v) / (vert[j+1].t - vert[j].t);
+
+   // wraparound at end
+   vert[8].t = 1;
+   vert[8].v = vert[0].v;
+   vert[8].s = vert[0].s;
+}
+
+STB_HEXWAVE_DEF void hexwave_generate_samples(float *output, int num_samples, HexWave *hex, float freq)
+{
+   hexvert vert[9];
+   int pass,i,j;
+   float t = hex->t;
+   float temp_output[2*STB_HEXWAVE_MAX_BLEP_LENGTH];
+   int buffered_length = sizeof(float)*hexblep.width;
+   float dt = (float) fabs(freq);
+   float recip_dt = (dt == 0.0f) ? 0.0f : 1.0f / dt;
+
+   int halfw = hexblep.width/2;
+   // all sample times are biased by halfw to leave room for BLEP/BLAMP to go back in time
+
+   if (num_samples <= 0)
+      return;
+
+   // convert parameters to times and slopes
+   hexwave_generate_linesegs(vert, hex, dt);
+
+   if (hex->prev_dt != dt) {
+      // if frequency changes, add a fixup at the derivative discontinuity starting at now
+      float slope;
+      for (j=1; j < 6; ++j)
+         if (t < vert[j].t)
+            break;
+      slope = vert[j].s;
+      if (slope != 0)
+         hex_blamp(output, 0, (dt - hex->prev_dt)*slope);
+      hex->prev_dt = dt;
+   }
+
+   // copy the buffered data from last call and clear the rest of the output array
+   memset(output, 0, sizeof(float)*num_samples);
+   memset(temp_output, 0, 2*hexblep.width*sizeof(float));
+
+   if (num_samples >= hexblep.width) {
+      memcpy(output, hex->buffer, buffered_length);
+   } else {
+      // if the output is shorter than hexblep.width, we do all synthesis to temp_output
+      memcpy(temp_output, hex->buffer, buffered_length);
+   }
+
+   for (pass=0; pass < 2; ++pass) {
+      int i0,i1;
+      float *out;
+
+      // we want to simulate having one buffer that is num_output + hexblep.width
+      // samples long, without putting that requirement on the user, and without
+      // allocating a temp buffer that's as long as the whole thing. so we use two
+      // overlapping buffers, one the user's buffer and one a fixed-length temp
+      // buffer.
+
+      if (pass == 0) {
+         if (num_samples < hexblep.width)
+            continue;
+         // run as far as we can without overwriting the end of the user's buffer 
+         out = output;
+         i0 = 0;
+         i1 = num_samples - hexblep.width;
+      } else {
+         // generate the rest into a temp buffer
+         out = temp_output;
+         i0 = 0;
+         if (num_samples >= hexblep.width)
+            i1 = hexblep.width;
+         else
+            i1 = num_samples;
+      }
+
+      // determine current segment
+      for (j=0; j < 8; ++j)
+         if (t < vert[j+1].t)                                  
+            break;
+
+      i = i0;
+      for(;;) {
+         while (t < vert[j+1].t) {
+            if (i == i1)
+               goto done;
+            out[i+halfw] += vert[j].v + vert[j].s*(t - vert[j].t);
+            t += dt;
+            ++i;
+         }
+         // transition from lineseg starting at j to lineseg starting at j+1
+
+         if (vert[j].t == vert[j+1].t)
+            hex_blep(out+i, recip_dt*(t-vert[j+1].t), (vert[j+1].v - vert[j].v));
+         hex_blamp(out+i, recip_dt*(t-vert[j+1].t), dt*(vert[j+1].s - vert[j].s));
+         ++j;
+
+         if (j == 8) {
+            // change to different waveform if there's a change pending
+            j = 0;
+            t -= 1.0; // t was >= 1.f if j==8
+            if (hex->have_pending) {
+               float prev_s0 = vert[j].s;
+               float prev_v0 = vert[j].v;
+               hex->current = hex->pending;
+               hex->have_pending = 0;
+               hexwave_generate_linesegs(vert, hex, dt);
+               // the following never occurs with this oscillator, but it makes
+               // the code work in more general cases
+               if (vert[j].v != prev_v0)
+                  hex_blep (out+i, recip_dt*t,    (vert[j].v - prev_v0));
+               if (vert[j].s != prev_s0)
+                  hex_blamp(out+i, recip_dt*t, dt*(vert[j].s - prev_s0));
+            }
+         }
+      }
+     done:
+      ;
+   }
+
+   // at this point, we've written output[] and temp_output[]
+   if (num_samples >= hexblep.width) {
+      // the first half of temp[] overlaps the end of output, the second half will be the new start overlap
+      for (i=0; i < hexblep.width; ++i)
+         output[num_samples-hexblep.width + i] += temp_output[i];
+      memcpy(hex->buffer, temp_output+hexblep.width, buffered_length);
+   } else {
+      for (i=0; i < num_samples; ++i)
+         output[i] = temp_output[i];
+      memcpy(hex->buffer, temp_output+num_samples, buffered_length);
+   }
+
+   hex->t = t;
+}
+
+STB_HEXWAVE_DEF void hexwave_shutdown(float *user_buffer)
+{
+   #ifndef STB_HEXWAVE_NO_ALLOCATION
+   if (user_buffer != 0) {
+      free(hexblep.blep);
+      free(hexblep.blamp);
+   }
+   #endif
+}
+
+// buffer should be NULL or must be 4*(width*(oversample+1)*2 + 
+STB_HEXWAVE_DEF void hexwave_init(int width, int oversample, float *user_buffer)
+{
+   int halfwidth = width/2;
+   int half = halfwidth*oversample;
+   int blep_buffer_count = width*(oversample+1);
+   int n = 2*half+1;
+#ifdef STB_HEXWAVE_NO_ALLOCATION
+   float *buffers = user_buffer;
+#else
+   float *buffers = user_buffer ? user_buffer : (float *) malloc(sizeof(float) * n * 2);
+#endif
+   float *step    = buffers+0*n;
+   float *ramp    = buffers+1*n;
+   float *blep_buffer, *blamp_buffer;
+   double integrate_impulse=0, integrate_step=0;
+   int i,j;
+
+   if (width > STB_HEXWAVE_MAX_BLEP_LENGTH)
+      width = STB_HEXWAVE_MAX_BLEP_LENGTH;
+
+   if (user_buffer == 0) {
+      #ifndef STB_HEXWAVE_NO_ALLOCATION
+      blep_buffer  = (float *) malloc(sizeof(float)*blep_buffer_count);
+      blamp_buffer = (float *) malloc(sizeof(float)*blep_buffer_count);
+      #endif
+   } else {
+      blep_buffer  = ramp+n;
+      blamp_buffer = blep_buffer + blep_buffer_count;
+   }
+
+   // compute BLEP and BLAMP by integerating windowed sinc
+   for (i=0; i < n; ++i) {
+      for (j=0; j < 16; ++j) {
+         float sinc_t = 3.141592f* (i-half) / oversample;
+         float sinc   = (i==half) ? 1.0f : (float) sin(sinc_t) / (sinc_t);
+         float wt     = 2.0f*3.1415926f * i / (n-1);
+         float window = (float) (0.355768 - 0.487396*cos(wt) + 0.144232*cos(2*wt) - 0.012604*cos(3*wt)); // Nuttall
+         double value       =         window * sinc;
+         integrate_impulse +=         value/16;
+         integrate_step    +=         integrate_impulse/16;
+      }
+      step[i]            = (float) integrate_impulse;
+      ramp[i]            = (float) integrate_step;
+   }
+
+   // renormalize
+   for (i=0; i < n; ++i) {
+      step[i] = step[i] * (float) (1.0       / step[n-1]); // step needs to reach to 1.0
+      ramp[i] = ramp[i] * (float) (halfwidth / ramp[n-1]); // ramp needs to become a slope of 1.0 after oversampling
+   }
+
+   // deinterleave to allow efficient interpolation e.g. w/SIMD
+   for (j=0; j <= oversample; ++j) {
+      for (i=0; i < width; ++i) {
+         blep_buffer [j*width+i] = step[j+i*oversample];
+         blamp_buffer[j*width+i] = ramp[j+i*oversample];
+      }
+   }
+
+   // subtract out the naive waveform; note we can't do this to the raw data
+   // above, because we want the discontinuity to be in a different locations
+   // for j=0 and j=oversample (which exists to provide something to interpolate against)
+   for (j=0; j <= oversample; ++j) {
+      // subtract step
+      for (i=halfwidth; i < width; ++i)
+         blep_buffer [j*width+i] -= 1.0f;
+      // subtract ramp
+      for (i=halfwidth; i < width; ++i)
+         blamp_buffer[j*width+i] -= (j+i*oversample-half)*(1.0f/oversample);
+   }
+
+   hexblep.blep  = blep_buffer;
+   hexblep.blamp = blamp_buffer;
+   hexblep.width = width;
+   hexblep.oversample = oversample;
+
+   #ifndef STB_HEXWAVE_NO_ALLOCATION
+   if (user_buffer == 0)
+      free(buffers);
+   #endif
+}
+#endif // STB_HEXWAVE_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- a/include-demo/stb_image.h
+++ b/include-demo/stb_image.h
@@ -1032,7 +1032,7 @@
 }
 
 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
 {
    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
@@ -1055,7 +1055,7 @@
    return stbi__malloc(a*b*c + add);
 }
 
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
 {
    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
@@ -3265,6 +3265,13 @@
    for (i=0; i < s->img_n; ++i) {
       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+   }
+
+   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+   // and I've never seen a non-corrupted JPEG file actually use them
+   for (i=0; i < s->img_n; ++i) {
+      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
+      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
    }
 
    // compute interleaved mcu info
--- /dev/null
+++ b/include-demo/stb_include.h
@@ -1,0 +1,295 @@
+// stb_include.h - v0.02 - parse and process #include directives - public domain
+//
+// To build this, in one source file that includes this file do
+//      #define STB_INCLUDE_IMPLEMENTATION
+//
+// This program parses a string and replaces lines of the form
+//         #include "foo"
+// with the contents of a file named "foo". It also embeds the
+// appropriate #line directives. Note that all include files must
+// reside in the location specified in the path passed to the API;
+// it does not check multiple directories.
+//
+// If the string contains a line of the form
+//         #inject
+// then it will be replaced with the contents of the string 'inject' passed to the API.
+//
+// Options:
+//
+//      Define STB_INCLUDE_LINE_GLSL to get GLSL-style #line directives
+//      which use numbers instead of filenames.
+//
+//      Define STB_INCLUDE_LINE_NONE to disable output of #line directives.
+//
+// Standard libraries:
+//
+//      stdio.h     FILE, fopen, fclose, fseek, ftell
+//      stdlib.h    malloc, realloc, free
+//      string.h    strcpy, strncmp, memcpy
+//
+// Credits:
+//
+// Written by Sean Barrett.
+//
+// Fixes:
+//  Michal Klos
+
+#ifndef STB_INCLUDE_STB_INCLUDE_H
+#define STB_INCLUDE_STB_INCLUDE_H
+
+// Do include-processing on the string 'str'. To free the return value, pass it to free()
+char *stb_include_string(char *str, char *inject, char *path_to_includes, char *filename_for_line_directive, char error[256]);
+
+// Concatenate the strings 'strs' and do include-processing on the result. To free the return value, pass it to free()
+char *stb_include_strings(char **strs, int count, char *inject, char *path_to_includes, char *filename_for_line_directive, char error[256]);
+
+// Load the file 'filename' and do include-processing on the string therein. note that
+// 'filename' is opened directly; 'path_to_includes' is not used. To free the return value, pass it to free()
+char *stb_include_file(char *filename, char *inject, char *path_to_includes, char error[256]);
+
+#endif
+
+
+#ifdef STB_INCLUDE_IMPLEMENTATION
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *stb_include_load_file(char *filename, size_t *plen)
+{
+   char *text;
+   size_t len;
+   FILE *f = fopen(filename, "rb");
+   if (f == 0) return 0;
+   fseek(f, 0, SEEK_END);
+   len = (size_t) ftell(f);
+   if (plen) *plen = len;
+   text = (char *) malloc(len+1);
+   if (text == 0) return 0;
+   fseek(f, 0, SEEK_SET);
+   fread(text, 1, len, f);
+   fclose(f);
+   text[len] = 0;
+   return text;
+}
+
+typedef struct
+{
+   int offset;
+   int end;
+   char *filename;
+   int next_line_after;
+} include_info;
+
+static include_info *stb_include_append_include(include_info *array, int len, int offset, int end, char *filename, int next_line)
+{
+   include_info *z = (include_info *) realloc(array, sizeof(*z) * (len+1));
+   z[len].offset   = offset;
+   z[len].end      = end;
+   z[len].filename = filename;
+   z[len].next_line_after = next_line;
+   return z;
+}
+
+static void stb_include_free_includes(include_info *array, int len)
+{
+   int i;
+   for (i=0; i < len; ++i)
+      free(array[i].filename);
+   free(array);
+}
+
+static int stb_include_isspace(int ch)
+{
+   return (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n');
+}
+
+// find location of all #include and #inject
+static int stb_include_find_includes(char *text, include_info **plist)
+{
+   int line_count = 1;
+   int inc_count = 0;
+   char *s = text, *start;
+   include_info *list = NULL;
+   while (*s) {
+      // parse is always at start of line when we reach here
+      start = s;
+      while (*s == ' ' || *s == '\t')
+         ++s;
+      if (*s == '#') {
+         ++s;
+         while (*s == ' ' || *s == '\t')
+            ++s;
+         if (0==strncmp(s, "include", 7) && stb_include_isspace(s[7])) {
+            s += 7;
+            while (*s == ' ' || *s == '\t')
+               ++s;
+            if (*s == '"') {
+               char *t = ++s;
+               while (*t != '"' && *t != '\n' && *t != '\r' && *t != 0)
+                  ++t;
+               if (*t == '"') {
+                  char *filename = (char *) malloc(t-s+1);
+                  memcpy(filename, s, t-s);
+                  filename[t-s] = 0;
+                  s=t;
+                  while (*s != '\r' && *s != '\n' && *s != 0)
+                     ++s;
+                  // s points to the newline, so s-start is everything except the newline
+                  list = stb_include_append_include(list, inc_count++, start-text, s-text, filename, line_count+1);
+               }
+            }
+         } else if (0==strncmp(s, "inject", 6) && (stb_include_isspace(s[6]) || s[6]==0)) {
+            while (*s != '\r' && *s != '\n' && *s != 0)
+               ++s;
+            list = stb_include_append_include(list, inc_count++, start-text, s-text, NULL, line_count+1);
+         }
+      }
+      while (*s != '\r' && *s != '\n' && *s != 0)
+         ++s;
+      if (*s == '\r' || *s == '\n') {
+         s = s + (s[0] + s[1] == '\r' + '\n' ? 2 : 1);
+      }
+      ++line_count;
+   }
+   *plist = list;
+   return inc_count;
+}
+
+// avoid dependency on sprintf()
+static void stb_include_itoa(char str[9], int n)
+{
+   int i;
+   for (i=0; i < 8; ++i)
+      str[i] = ' ';
+   str[i] = 0;
+
+   for (i=1; i < 8; ++i) {
+      str[7-i] = '0' + (n % 10);
+      n /= 10;
+      if (n == 0)
+         break;
+   }
+}
+
+static char *stb_include_append(char *str, size_t *curlen, char *addstr, size_t addlen)
+{
+   str = (char *) realloc(str, *curlen + addlen);
+   memcpy(str + *curlen, addstr, addlen);
+   *curlen += addlen;
+   return str;
+}
+
+char *stb_include_string(char *str, char *inject, char *path_to_includes, char *filename, char error[256])
+{
+   char temp[4096];
+   include_info *inc_list;
+   int i, num = stb_include_find_includes(str, &inc_list);
+   size_t source_len = strlen(str);
+   char *text=0;
+   size_t textlen=0, last=0;
+   for (i=0; i < num; ++i) {
+      text = stb_include_append(text, &textlen, str+last, inc_list[i].offset - last);
+      // write out line directive for the include
+      #ifndef STB_INCLUDE_LINE_NONE
+      #ifdef STB_INCLUDE_LINE_GLSL
+      if (textlen != 0)  // GLSL #version must appear first, so don't put a #line at the top
+      #endif
+      {
+         strcpy(temp, "#line ");
+         stb_include_itoa(temp+6, 1);
+         strcat(temp, " ");
+         #ifdef STB_INCLUDE_LINE_GLSL
+         stb_include_itoa(temp+15, i+1);
+         #else
+         strcat(temp, "\"");
+         if (inc_list[i].filename == 0)
+            strcmp(temp, "INJECT");
+         else
+            strcat(temp, inc_list[i].filename);
+         strcat(temp, "\"");
+         #endif
+         strcat(temp, "\n");
+         text = stb_include_append(text, &textlen, temp, strlen(temp));
+      }
+      #endif
+      if (inc_list[i].filename == 0) {
+         if (inject != 0)
+            text = stb_include_append(text, &textlen, inject, strlen(inject));
+      } else {
+         char *inc;
+         strcpy(temp, path_to_includes);
+         strcat(temp, "/");
+         strcat(temp, inc_list[i].filename);
+         inc = stb_include_file(temp, inject, path_to_includes, error);
+         if (inc == NULL) {
+            stb_include_free_includes(inc_list, num);
+            return NULL;
+         }
+         text = stb_include_append(text, &textlen, inc, strlen(inc));
+         free(inc);
+      }
+      // write out line directive
+      #ifndef STB_INCLUDE_LINE_NONE
+      strcpy(temp, "\n#line ");
+      stb_include_itoa(temp+6, inc_list[i].next_line_after);
+      strcat(temp, " ");
+      #ifdef STB_INCLUDE_LINE_GLSL
+      stb_include_itoa(temp+15, 0);
+      #else
+      strcat(temp, filename != 0 ? filename : "source-file");
+      #endif
+      text = stb_include_append(text, &textlen, temp, strlen(temp));
+      // no newlines, because we kept the #include newlines, which will get appended next
+      #endif
+      last = inc_list[i].end;
+   }
+   text = stb_include_append(text, &textlen, str+last, source_len - last + 1); // append '\0'
+   stb_include_free_includes(inc_list, num);
+   return text;
+}
+
+char *stb_include_strings(char **strs, int count, char *inject, char *path_to_includes, char *filename, char error[256])
+{
+   char *text;
+   char *result;
+   int i;
+   size_t length=0;
+   for (i=0; i < count; ++i)
+      length += strlen(strs[i]);
+   text = (char *) malloc(length+1);
+   length = 0;
+   for (i=0; i < count; ++i) {
+      strcpy(text + length, strs[i]);
+      length += strlen(strs[i]);
+   }
+   result = stb_include_string(text, inject, path_to_includes, filename, error);
+   free(text);
+   return result;
+}
+
+char *stb_include_file(char *filename, char *inject, char *path_to_includes, char error[256])
+{
+   size_t len;
+   char *result;
+   char *text = stb_include_load_file(filename, &len);
+   if (text == NULL) {
+      strcpy(error, "Error: couldn't load '");
+      strcat(error, filename);
+      strcat(error, "'");
+      return 0;
+   }
+   result = stb_include_string(text, inject, path_to_includes, filename, error);
+   free(text);
+   return result;
+}
+
+#if 0 // @TODO, GL_ARB_shader_language_include-style system that doesn't touch filesystem
+char *stb_include_preloaded(char *str, char *inject, char *includes[][2], char error[256])
+{
+
+}
+#endif
+
+#endif // STB_INCLUDE_IMPLEMENTATION
--- /dev/null
+++ b/include-demo/stb_leakcheck.h
@@ -1,0 +1,194 @@
+// stb_leakcheck.h - v0.6 - quick & dirty malloc leak-checking - public domain
+// LICENSE
+//
+//   See end of file.
+
+#ifdef STB_LEAKCHECK_IMPLEMENTATION
+#undef STB_LEAKCHECK_IMPLEMENTATION // don't implement more than once
+
+// if we've already included leakcheck before, undefine the macros
+#ifdef malloc
+#undef malloc
+#undef free
+#undef realloc
+#endif
+
+#ifndef STB_LEAKCHECK_OUTPUT_PIPE
+#define STB_LEAKCHECK_OUTPUT_PIPE stdout
+#endif
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+typedef struct malloc_info stb_leakcheck_malloc_info;
+
+struct malloc_info
+{
+   const char *file;
+   int line;
+   size_t size;
+   stb_leakcheck_malloc_info *next,*prev;
+};
+
+static stb_leakcheck_malloc_info *mi_head;
+
+void *stb_leakcheck_malloc(size_t sz, const char *file, int line)
+{
+   stb_leakcheck_malloc_info *mi = (stb_leakcheck_malloc_info *) malloc(sz + sizeof(*mi));
+   if (mi == NULL) return mi;
+   mi->file = file;
+   mi->line = line;
+   mi->next = mi_head;
+   if (mi_head)
+      mi->next->prev = mi;
+   mi->prev = NULL;
+   mi->size = (int) sz;
+   mi_head = mi;
+   return mi+1;
+}
+
+void stb_leakcheck_free(void *ptr)
+{
+   if (ptr != NULL) {
+      stb_leakcheck_malloc_info *mi = (stb_leakcheck_malloc_info *) ptr - 1;
+      mi->size = ~mi->size;
+      #ifndef STB_LEAKCHECK_SHOWALL
+      if (mi->prev == NULL) {
+         assert(mi_head == mi);
+         mi_head = mi->next;
+      } else
+         mi->prev->next = mi->next;
+      if (mi->next)
+         mi->next->prev = mi->prev;
+      free(mi);
+      #endif
+   }
+}
+
+void *stb_leakcheck_realloc(void *ptr, size_t sz, const char *file, int line)
+{
+   if (ptr == NULL) {
+      return stb_leakcheck_malloc(sz, file, line);
+   } else if (sz == 0) {
+      stb_leakcheck_free(ptr);
+      return NULL;
+   } else {
+      stb_leakcheck_malloc_info *mi = (stb_leakcheck_malloc_info *) ptr - 1;
+      if (sz <= mi->size)
+         return ptr;
+      else {
+         #ifdef STB_LEAKCHECK_REALLOC_PRESERVE_MALLOC_FILELINE
+         void *q = stb_leakcheck_malloc(sz, mi->file, mi->line);
+         #else
+         void *q = stb_leakcheck_malloc(sz, file, line);
+         #endif
+         if (q) {
+            memcpy(q, ptr, mi->size);
+            stb_leakcheck_free(ptr);
+         }
+         return q;
+      }
+   }
+}
+
+static void stblkck_internal_print(const char *reason, stb_leakcheck_malloc_info *mi)
+{
+#if defined(_MSC_VER) && _MSC_VER < 1900 // 1900=VS 2015
+   // Compilers that use the old MS C runtime library don't have %zd
+   // and the older ones don't even have %lld either... however, the old compilers
+   // without "long long" don't support 64-bit targets either, so here's the
+   // compromise:
+   #if _MSC_VER < 1400 // before VS 2005
+      fprintf(STB_LEAKCHECK_OUTPUT_PIPE, "%s: %s (%4d): %8d bytes at %p\n", reason, mi->file, mi->line, (int)mi->size, (void*)(mi+1));
+   #else
+      fprintf(STB_LEAKCHECK_OUTPUT_PIPE, "%s: %s (%4d): %16lld bytes at %p\n", reason, mi->file, mi->line, (long long)mi->size, (void*)(mi+1));
+   #endif
+#else
+   // Assume we have %zd on other targets.
+   #ifdef __MINGW32__
+      __mingw_fprintf(STB_LEAKCHECK_OUTPUT_PIPE, "%s: %s (%4d): %zd bytes at %p\n", reason, mi->file, mi->line, mi->size, (void*)(mi+1));
+   #else
+      fprintf(STB_LEAKCHECK_OUTPUT_PIPE, "%s: %s (%4d): %zd bytes at %p\n", reason, mi->file, mi->line, mi->size, (void*)(mi+1));
+   #endif
+#endif
+}
+
+void stb_leakcheck_dumpmem(void)
+{
+   stb_leakcheck_malloc_info *mi = mi_head;
+   while (mi) {
+      if ((ptrdiff_t) mi->size >= 0)
+         stblkck_internal_print("LEAKED", mi);
+      mi = mi->next;
+   }
+   #ifdef STB_LEAKCHECK_SHOWALL
+   mi = mi_head;
+   while (mi) {
+      if ((ptrdiff_t) mi->size < 0)
+         stblkck_internal_print("FREED ", mi);
+      mi = mi->next;
+   }
+   #endif
+}
+#endif // STB_LEAKCHECK_IMPLEMENTATION
+
+#if !defined(INCLUDE_STB_LEAKCHECK_H) || !defined(malloc)
+#define INCLUDE_STB_LEAKCHECK_H
+
+#include <stdlib.h> // we want to define the macros *after* stdlib to avoid a slew of errors
+
+#define malloc(sz)    stb_leakcheck_malloc(sz, __FILE__, __LINE__)
+#define free(p)       stb_leakcheck_free(p)
+#define realloc(p,sz) stb_leakcheck_realloc(p,sz, __FILE__, __LINE__)
+
+extern void * stb_leakcheck_malloc(size_t sz, const char *file, int line);
+extern void * stb_leakcheck_realloc(void *ptr, size_t sz, const char *file, int line);
+extern void   stb_leakcheck_free(void *ptr);
+extern void   stb_leakcheck_dumpmem(void);
+
+#endif // INCLUDE_STB_LEAKCHECK_H
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_rect_pack.h
@@ -1,0 +1,623 @@
+// stb_rect_pack.h - v1.01 - public domain - rectangle packing
+// Sean Barrett 2014
+//
+// Useful for e.g. packing rectangular textures into an atlas.
+// Does not do rotation.
+//
+// Before #including,
+//
+//    #define STB_RECT_PACK_IMPLEMENTATION
+//
+// in the file that you want to have the implementation.
+//
+// Not necessarily the awesomest packing method, but better than
+// the totally naive one in stb_truetype (which is primarily what
+// this is meant to replace).
+//
+// Has only had a few tests run, may have issues.
+//
+// More docs to come.
+//
+// No memory allocations; uses qsort() and assert() from stdlib.
+// Can override those by defining STBRP_SORT and STBRP_ASSERT.
+//
+// This library currently uses the Skyline Bottom-Left algorithm.
+//
+// Please note: better rectangle packers are welcome! Please
+// implement them to the same API, but with a different init
+// function.
+//
+// Credits
+//
+//  Library
+//    Sean Barrett
+//  Minor features
+//    Martins Mozeiko
+//    github:IntellectualKitty
+//
+//  Bugfixes / warning fixes
+//    Jeremy Jaussaud
+//    Fabian Giesen
+//
+// Version history:
+//
+//     1.01  (2021-07-11)  always use large rect mode, expose STBRP__MAXVAL in public section
+//     1.00  (2019-02-25)  avoid small space waste; gracefully fail too-wide rectangles
+//     0.99  (2019-02-07)  warning fixes
+//     0.11  (2017-03-03)  return packing success/fail result
+//     0.10  (2016-10-25)  remove cast-away-const to avoid warnings
+//     0.09  (2016-08-27)  fix compiler warnings
+//     0.08  (2015-09-13)  really fix bug with empty rects (w=0 or h=0)
+//     0.07  (2015-09-13)  fix bug with empty rects (w=0 or h=0)
+//     0.06  (2015-04-15)  added STBRP_SORT to allow replacing qsort
+//     0.05:  added STBRP_ASSERT to allow replacing assert
+//     0.04:  fixed minor bug in STBRP_LARGE_RECTS support
+//     0.01:  initial release
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//       INCLUDE SECTION
+//
+
+#ifndef STB_INCLUDE_STB_RECT_PACK_H
+#define STB_INCLUDE_STB_RECT_PACK_H
+
+#define STB_RECT_PACK_VERSION  1
+
+#ifdef STBRP_STATIC
+#define STBRP_DEF static
+#else
+#define STBRP_DEF extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct stbrp_context stbrp_context;
+typedef struct stbrp_node    stbrp_node;
+typedef struct stbrp_rect    stbrp_rect;
+
+typedef int            stbrp_coord;
+
+#define STBRP__MAXVAL  0x7fffffff
+// Mostly for internal use, but this is the maximum supported coordinate value.
+
+STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects);
+// Assign packed locations to rectangles. The rectangles are of type
+// 'stbrp_rect' defined below, stored in the array 'rects', and there
+// are 'num_rects' many of them.
+//
+// Rectangles which are successfully packed have the 'was_packed' flag
+// set to a non-zero value and 'x' and 'y' store the minimum location
+// on each axis (i.e. bottom-left in cartesian coordinates, top-left
+// if you imagine y increasing downwards). Rectangles which do not fit
+// have the 'was_packed' flag set to 0.
+//
+// You should not try to access the 'rects' array from another thread
+// while this function is running, as the function temporarily reorders
+// the array while it executes.
+//
+// To pack into another rectangle, you need to call stbrp_init_target
+// again. To continue packing into the same rectangle, you can call
+// this function again. Calling this multiple times with multiple rect
+// arrays will probably produce worse packing results than calling it
+// a single time with the full rectangle array, but the option is
+// available.
+//
+// The function returns 1 if all of the rectangles were successfully
+// packed and 0 otherwise.
+
+struct stbrp_rect
+{
+   // reserved for your use:
+   int            id;
+
+   // input:
+   stbrp_coord    w, h;
+
+   // output:
+   stbrp_coord    x, y;
+   int            was_packed;  // non-zero if valid packing
+
+}; // 16 bytes, nominally
+
+
+STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes);
+// Initialize a rectangle packer to:
+//    pack a rectangle that is 'width' by 'height' in dimensions
+//    using temporary storage provided by the array 'nodes', which is 'num_nodes' long
+//
+// You must call this function every time you start packing into a new target.
+//
+// There is no "shutdown" function. The 'nodes' memory must stay valid for
+// the following stbrp_pack_rects() call (or calls), but can be freed after
+// the call (or calls) finish.
+//
+// Note: to guarantee best results, either:
+//       1. make sure 'num_nodes' >= 'width'
+//   or  2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1'
+//
+// If you don't do either of the above things, widths will be quantized to multiples
+// of small integers to guarantee the algorithm doesn't run out of temporary storage.
+//
+// If you do #2, then the non-quantized algorithm will be used, but the algorithm
+// may run out of temporary storage and be unable to pack some rectangles.
+
+STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem);
+// Optionally call this function after init but before doing any packing to
+// change the handling of the out-of-temp-memory scenario, described above.
+// If you call init again, this will be reset to the default (false).
+
+
+STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic);
+// Optionally select which packing heuristic the library should use. Different
+// heuristics will produce better/worse results for different data sets.
+// If you call init again, this will be reset to the default.
+
+enum
+{
+   STBRP_HEURISTIC_Skyline_default=0,
+   STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default,
+   STBRP_HEURISTIC_Skyline_BF_sortHeight
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// the details of the following structures don't matter to you, but they must
+// be visible so you can handle the memory allocations for them
+
+struct stbrp_node
+{
+   stbrp_coord  x,y;
+   stbrp_node  *next;
+};
+
+struct stbrp_context
+{
+   int width;
+   int height;
+   int align;
+   int init_mode;
+   int heuristic;
+   int num_nodes;
+   stbrp_node *active_head;
+   stbrp_node *free_head;
+   stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2'
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//     IMPLEMENTATION SECTION
+//
+
+#ifdef STB_RECT_PACK_IMPLEMENTATION
+#ifndef STBRP_SORT
+#include <stdlib.h>
+#define STBRP_SORT qsort
+#endif
+
+#ifndef STBRP_ASSERT
+#include <assert.h>
+#define STBRP_ASSERT assert
+#endif
+
+#ifdef _MSC_VER
+#define STBRP__NOTUSED(v)  (void)(v)
+#define STBRP__CDECL       __cdecl
+#else
+#define STBRP__NOTUSED(v)  (void)sizeof(v)
+#define STBRP__CDECL
+#endif
+
+enum
+{
+   STBRP__INIT_skyline = 1
+};
+
+STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic)
+{
+   switch (context->init_mode) {
+      case STBRP__INIT_skyline:
+         STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight);
+         context->heuristic = heuristic;
+         break;
+      default:
+         STBRP_ASSERT(0);
+   }
+}
+
+STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem)
+{
+   if (allow_out_of_mem)
+      // if it's ok to run out of memory, then don't bother aligning them;
+      // this gives better packing, but may fail due to OOM (even though
+      // the rectangles easily fit). @TODO a smarter approach would be to only
+      // quantize once we've hit OOM, then we could get rid of this parameter.
+      context->align = 1;
+   else {
+      // if it's not ok to run out of memory, then quantize the widths
+      // so that num_nodes is always enough nodes.
+      //
+      // I.e. num_nodes * align >= width
+      //                  align >= width / num_nodes
+      //                  align = ceil(width/num_nodes)
+
+      context->align = (context->width + context->num_nodes-1) / context->num_nodes;
+   }
+}
+
+STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes)
+{
+   int i;
+
+   for (i=0; i < num_nodes-1; ++i)
+      nodes[i].next = &nodes[i+1];
+   nodes[i].next = NULL;
+   context->init_mode = STBRP__INIT_skyline;
+   context->heuristic = STBRP_HEURISTIC_Skyline_default;
+   context->free_head = &nodes[0];
+   context->active_head = &context->extra[0];
+   context->width = width;
+   context->height = height;
+   context->num_nodes = num_nodes;
+   stbrp_setup_allow_out_of_mem(context, 0);
+
+   // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly)
+   context->extra[0].x = 0;
+   context->extra[0].y = 0;
+   context->extra[0].next = &context->extra[1];
+   context->extra[1].x = (stbrp_coord) width;
+   context->extra[1].y = (1<<30);
+   context->extra[1].next = NULL;
+}
+
+// find minimum y position if it starts at x1
+static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste)
+{
+   stbrp_node *node = first;
+   int x1 = x0 + width;
+   int min_y, visited_width, waste_area;
+
+   STBRP__NOTUSED(c);
+
+   STBRP_ASSERT(first->x <= x0);
+
+   #if 0
+   // skip in case we're past the node
+   while (node->next->x <= x0)
+      ++node;
+   #else
+   STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency
+   #endif
+
+   STBRP_ASSERT(node->x <= x0);
+
+   min_y = 0;
+   waste_area = 0;
+   visited_width = 0;
+   while (node->x < x1) {
+      if (node->y > min_y) {
+         // raise min_y higher.
+         // we've accounted for all waste up to min_y,
+         // but we'll now add more waste for everything we've visted
+         waste_area += visited_width * (node->y - min_y);
+         min_y = node->y;
+         // the first time through, visited_width might be reduced
+         if (node->x < x0)
+            visited_width += node->next->x - x0;
+         else
+            visited_width += node->next->x - node->x;
+      } else {
+         // add waste area
+         int under_width = node->next->x - node->x;
+         if (under_width + visited_width > width)
+            under_width = width - visited_width;
+         waste_area += under_width * (min_y - node->y);
+         visited_width += under_width;
+      }
+      node = node->next;
+   }
+
+   *pwaste = waste_area;
+   return min_y;
+}
+
+typedef struct
+{
+   int x,y;
+   stbrp_node **prev_link;
+} stbrp__findresult;
+
+static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height)
+{
+   int best_waste = (1<<30), best_x, best_y = (1 << 30);
+   stbrp__findresult fr;
+   stbrp_node **prev, *node, *tail, **best = NULL;
+
+   // align to multiple of c->align
+   width = (width + c->align - 1);
+   width -= width % c->align;
+   STBRP_ASSERT(width % c->align == 0);
+
+   // if it can't possibly fit, bail immediately
+   if (width > c->width || height > c->height) {
+      fr.prev_link = NULL;
+      fr.x = fr.y = 0;
+      return fr;
+   }
+
+   node = c->active_head;
+   prev = &c->active_head;
+   while (node->x + width <= c->width) {
+      int y,waste;
+      y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste);
+      if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL
+         // bottom left
+         if (y < best_y) {
+            best_y = y;
+            best = prev;
+         }
+      } else {
+         // best-fit
+         if (y + height <= c->height) {
+            // can only use it if it first vertically
+            if (y < best_y || (y == best_y && waste < best_waste)) {
+               best_y = y;
+               best_waste = waste;
+               best = prev;
+            }
+         }
+      }
+      prev = &node->next;
+      node = node->next;
+   }
+
+   best_x = (best == NULL) ? 0 : (*best)->x;
+
+   // if doing best-fit (BF), we also have to try aligning right edge to each node position
+   //
+   // e.g, if fitting
+   //
+   //     ____________________
+   //    |____________________|
+   //
+   //            into
+   //
+   //   |                         |
+   //   |             ____________|
+   //   |____________|
+   //
+   // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned
+   //
+   // This makes BF take about 2x the time
+
+   if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) {
+      tail = c->active_head;
+      node = c->active_head;
+      prev = &c->active_head;
+      // find first node that's admissible
+      while (tail->x < width)
+         tail = tail->next;
+      while (tail) {
+         int xpos = tail->x - width;
+         int y,waste;
+         STBRP_ASSERT(xpos >= 0);
+         // find the left position that matches this
+         while (node->next->x <= xpos) {
+            prev = &node->next;
+            node = node->next;
+         }
+         STBRP_ASSERT(node->next->x > xpos && node->x <= xpos);
+         y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste);
+         if (y + height <= c->height) {
+            if (y <= best_y) {
+               if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) {
+                  best_x = xpos;
+                  STBRP_ASSERT(y <= best_y);
+                  best_y = y;
+                  best_waste = waste;
+                  best = prev;
+               }
+            }
+         }
+         tail = tail->next;
+      }
+   }
+
+   fr.prev_link = best;
+   fr.x = best_x;
+   fr.y = best_y;
+   return fr;
+}
+
+static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height)
+{
+   // find best position according to heuristic
+   stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height);
+   stbrp_node *node, *cur;
+
+   // bail if:
+   //    1. it failed
+   //    2. the best node doesn't fit (we don't always check this)
+   //    3. we're out of memory
+   if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) {
+      res.prev_link = NULL;
+      return res;
+   }
+
+   // on success, create new node
+   node = context->free_head;
+   node->x = (stbrp_coord) res.x;
+   node->y = (stbrp_coord) (res.y + height);
+
+   context->free_head = node->next;
+
+   // insert the new node into the right starting point, and
+   // let 'cur' point to the remaining nodes needing to be
+   // stiched back in
+
+   cur = *res.prev_link;
+   if (cur->x < res.x) {
+      // preserve the existing one, so start testing with the next one
+      stbrp_node *next = cur->next;
+      cur->next = node;
+      cur = next;
+   } else {
+      *res.prev_link = node;
+   }
+
+   // from here, traverse cur and free the nodes, until we get to one
+   // that shouldn't be freed
+   while (cur->next && cur->next->x <= res.x + width) {
+      stbrp_node *next = cur->next;
+      // move the current node to the free list
+      cur->next = context->free_head;
+      context->free_head = cur;
+      cur = next;
+   }
+
+   // stitch the list back in
+   node->next = cur;
+
+   if (cur->x < res.x + width)
+      cur->x = (stbrp_coord) (res.x + width);
+
+#ifdef _DEBUG
+   cur = context->active_head;
+   while (cur->x < context->width) {
+      STBRP_ASSERT(cur->x < cur->next->x);
+      cur = cur->next;
+   }
+   STBRP_ASSERT(cur->next == NULL);
+
+   {
+      int count=0;
+      cur = context->active_head;
+      while (cur) {
+         cur = cur->next;
+         ++count;
+      }
+      cur = context->free_head;
+      while (cur) {
+         cur = cur->next;
+         ++count;
+      }
+      STBRP_ASSERT(count == context->num_nodes+2);
+   }
+#endif
+
+   return res;
+}
+
+static int STBRP__CDECL rect_height_compare(const void *a, const void *b)
+{
+   const stbrp_rect *p = (const stbrp_rect *) a;
+   const stbrp_rect *q = (const stbrp_rect *) b;
+   if (p->h > q->h)
+      return -1;
+   if (p->h < q->h)
+      return  1;
+   return (p->w > q->w) ? -1 : (p->w < q->w);
+}
+
+static int STBRP__CDECL rect_original_order(const void *a, const void *b)
+{
+   const stbrp_rect *p = (const stbrp_rect *) a;
+   const stbrp_rect *q = (const stbrp_rect *) b;
+   return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed);
+}
+
+STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects)
+{
+   int i, all_rects_packed = 1;
+
+   // we use the 'was_packed' field internally to allow sorting/unsorting
+   for (i=0; i < num_rects; ++i) {
+      rects[i].was_packed = i;
+   }
+
+   // sort according to heuristic
+   STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare);
+
+   for (i=0; i < num_rects; ++i) {
+      if (rects[i].w == 0 || rects[i].h == 0) {
+         rects[i].x = rects[i].y = 0;  // empty rect needs no space
+      } else {
+         stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h);
+         if (fr.prev_link) {
+            rects[i].x = (stbrp_coord) fr.x;
+            rects[i].y = (stbrp_coord) fr.y;
+         } else {
+            rects[i].x = rects[i].y = STBRP__MAXVAL;
+         }
+      }
+   }
+
+   // unsort
+   STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order);
+
+   // set was_packed flags and all_rects_packed status
+   for (i=0; i < num_rects; ++i) {
+      rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL);
+      if (!rects[i].was_packed)
+         all_rects_packed = 0;
+   }
+
+   // return the all_rects_packed status
+   return all_rects_packed;
+}
+#endif
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_sprintf.h
@@ -1,0 +1,1906 @@
+// stb_sprintf - v1.10 - public domain snprintf() implementation
+// originally by Jeff Roberts / RAD Game Tools, 2015/10/20
+// http://github.com/nothings/stb
+//
+// allowed types:  sc uidBboXx p AaGgEef n
+// lengths      :  hh h ll j z t I64 I32 I
+//
+// Contributors:
+//    Fabian "ryg" Giesen (reformatting)
+//    github:aganm (attribute format)
+//
+// Contributors (bugfixes):
+//    github:d26435
+//    github:trex78
+//    github:account-login
+//    Jari Komppa (SI suffixes)
+//    Rohit Nirmal
+//    Marcin Wojdyr
+//    Leonard Ritter
+//    Stefano Zanotti
+//    Adam Allison
+//    Arvid Gerstmann
+//    Markus Kolb
+//
+// LICENSE:
+//
+//   See end of file for license information.
+
+#ifndef STB_SPRINTF_H_INCLUDE
+#define STB_SPRINTF_H_INCLUDE
+
+/*
+Single file sprintf replacement.
+
+Originally written by Jeff Roberts at RAD Game Tools - 2015/10/20.
+Hereby placed in public domain.
+
+This is a full sprintf replacement that supports everything that
+the C runtime sprintfs support, including float/double, 64-bit integers,
+hex floats, field parameters (%*.*d stuff), length reads backs, etc.
+
+Why would you need this if sprintf already exists?  Well, first off,
+it's *much* faster (see below). It's also much smaller than the CRT
+versions code-space-wise. We've also added some simple improvements
+that are super handy (commas in thousands, callbacks at buffer full,
+for example). Finally, the format strings for MSVC and GCC differ
+for 64-bit integers (among other small things), so this lets you use
+the same format strings in cross platform code.
+
+It uses the standard single file trick of being both the header file
+and the source itself. If you just include it normally, you just get
+the header file function definitions. To get the code, you include
+it from a C or C++ file and define STB_SPRINTF_IMPLEMENTATION first.
+
+It only uses va_args macros from the C runtime to do it's work. It
+does cast doubles to S64s and shifts and divides U64s, which does
+drag in CRT code on most platforms.
+
+It compiles to roughly 8K with float support, and 4K without.
+As a comparison, when using MSVC static libs, calling sprintf drags
+in 16K.
+
+API:
+====
+int stbsp_sprintf( char * buf, char const * fmt, ... )
+int stbsp_snprintf( char * buf, int count, char const * fmt, ... )
+  Convert an arg list into a buffer.  stbsp_snprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintf( char * buf, char const * fmt, va_list va )
+int stbsp_vsnprintf( char * buf, int count, char const * fmt, va_list va )
+  Convert a va_list arg list into a buffer.  stbsp_vsnprintf always returns
+  a zero-terminated string (unlike regular snprintf).
+
+int stbsp_vsprintfcb( STBSP_SPRINTFCB * callback, void * user, char * buf, char const * fmt, va_list va )
+    typedef char * STBSP_SPRINTFCB( char const * buf, void * user, int len );
+  Convert into a buffer, calling back every STB_SPRINTF_MIN chars.
+  Your callback can then copy the chars out, print them or whatever.
+  This function is actually the workhorse for everything else.
+  The buffer you pass in must hold at least STB_SPRINTF_MIN characters.
+    // you return the next buffer to use or 0 to stop converting
+
+void stbsp_set_separators( char comma, char period )
+  Set the comma and period characters to use.
+
+FLOATS/DOUBLES:
+===============
+This code uses a internal float->ascii conversion method that uses
+doubles with error correction (double-doubles, for ~105 bits of
+precision).  This conversion is round-trip perfect - that is, an atof
+of the values output here will give you the bit-exact double back.
+
+One difference is that our insignificant digits will be different than
+with MSVC or GCC (but they don't match each other either).  We also
+don't attempt to find the minimum length matching float (pre-MSVC15
+doesn't either).
+
+If you don't need float or doubles at all, define STB_SPRINTF_NOFLOAT
+and you'll save 4K of code space.
+
+64-BIT INTS:
+============
+This library also supports 64-bit integers and you can use MSVC style or
+GCC style indicators (%I64d or %lld).  It supports the C99 specifiers
+for size_t and ptr_diff_t (%jd %zd) as well.
+
+EXTRAS:
+=======
+Like some GCCs, for integers and floats, you can use a ' (single quote)
+specifier and commas will be inserted on the thousands: "%'d" on 12345
+would print 12,345.
+
+For integers and floats, you can use a "$" specifier and the number
+will be converted to float and then divided to get kilo, mega, giga or
+tera and then printed, so "%$d" 1000 is "1.0 k", "%$.2d" 2536000 is
+"2.53 M", etc. For byte values, use two $:s, like "%$$d" to turn
+2536000 to "2.42 Mi". If you prefer JEDEC suffixes to SI ones, use three
+$:s: "%$$$d" -> "2.42 M". To remove the space between the number and the
+suffix, add "_" specifier: "%_$d" -> "2.53M".
+
+In addition to octal and hexadecimal conversions, you can print
+integers in binary: "%b" for 256 would print 100.
+
+PERFORMANCE vs MSVC 2008 32-/64-bit (GCC is even slower than MSVC):
+===================================================================
+"%d" across all 32-bit ints (4.8x/4.0x faster than 32-/64-bit MSVC)
+"%24d" across all 32-bit ints (4.5x/4.2x faster)
+"%x" across all 32-bit ints (4.5x/3.8x faster)
+"%08x" across all 32-bit ints (4.3x/3.8x faster)
+"%f" across e-10 to e+10 floats (7.3x/6.0x faster)
+"%e" across e-10 to e+10 floats (8.1x/6.0x faster)
+"%g" across e-10 to e+10 floats (10.0x/7.1x faster)
+"%f" for values near e-300 (7.9x/6.5x faster)
+"%f" for values near e+300 (10.0x/9.1x faster)
+"%e" for values near e-300 (10.1x/7.0x faster)
+"%e" for values near e+300 (9.2x/6.0x faster)
+"%.320f" for values near e-300 (12.6x/11.2x faster)
+"%a" for random values (8.6x/4.3x faster)
+"%I64d" for 64-bits with 32-bit values (4.8x/3.4x faster)
+"%I64d" for 64-bits > 32-bit values (4.9x/5.5x faster)
+"%s%s%s" for 64 char strings (7.1x/7.3x faster)
+"...512 char string..." ( 35.0x/32.5x faster!)
+*/
+
+#if defined(__clang__)
+ #if defined(__has_feature) && defined(__has_attribute)
+  #if __has_feature(address_sanitizer)
+   #if __has_attribute(__no_sanitize__)
+    #define STBSP__ASAN __attribute__((__no_sanitize__("address")))
+   #elif __has_attribute(__no_sanitize_address__)
+    #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+   #elif __has_attribute(__no_address_safety_analysis__)
+    #define STBSP__ASAN __attribute__((__no_address_safety_analysis__))
+   #endif
+  #endif
+ #endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+ #if defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__
+  #define STBSP__ASAN __attribute__((__no_sanitize_address__))
+ #endif
+#endif
+
+#ifndef STBSP__ASAN
+#define STBSP__ASAN
+#endif
+
+#ifdef STB_SPRINTF_STATIC
+#define STBSP__PUBLICDEC static
+#define STBSP__PUBLICDEF static STBSP__ASAN
+#else
+#ifdef __cplusplus
+#define STBSP__PUBLICDEC extern "C"
+#define STBSP__PUBLICDEF extern "C" STBSP__ASAN
+#else
+#define STBSP__PUBLICDEC extern
+#define STBSP__PUBLICDEF STBSP__ASAN
+#endif
+#endif
+
+#if defined(__has_attribute)
+ #if __has_attribute(format)
+   #define STBSP__ATTRIBUTE_FORMAT(fmt,va) __attribute__((format(printf,fmt,va)))
+ #endif
+#endif
+
+#ifndef STBSP__ATTRIBUTE_FORMAT
+#define STBSP__ATTRIBUTE_FORMAT(fmt,va)
+#endif
+
+#ifdef _MSC_VER
+#define STBSP__NOTUSED(v)  (void)(v)
+#else
+#define STBSP__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#include <stdarg.h> // for va_arg(), va_list()
+#include <stddef.h> // size_t, ptrdiff_t
+
+#ifndef STB_SPRINTF_MIN
+#define STB_SPRINTF_MIN 512 // how many characters per callback
+#endif
+typedef char *STBSP_SPRINTFCB(const char *buf, void *user, int len);
+
+#ifndef STB_SPRINTF_DECORATE
+#define STB_SPRINTF_DECORATE(name) stbsp_##name // define this before including if you want to change the names
+#endif
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsnprintf)(char *buf, int count, char const *fmt, va_list va);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(2,3);
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...) STBSP__ATTRIBUTE_FORMAT(3,4);
+
+STBSP__PUBLICDEC int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va);
+STBSP__PUBLICDEC void STB_SPRINTF_DECORATE(set_separators)(char comma, char period);
+
+#endif // STB_SPRINTF_H_INCLUDE
+
+#ifdef STB_SPRINTF_IMPLEMENTATION
+
+#define stbsp__uint32 unsigned int
+#define stbsp__int32 signed int
+
+#ifdef _MSC_VER
+#define stbsp__uint64 unsigned __int64
+#define stbsp__int64 signed __int64
+#else
+#define stbsp__uint64 unsigned long long
+#define stbsp__int64 signed long long
+#endif
+#define stbsp__uint16 unsigned short
+
+#ifndef stbsp__uintptr
+#if defined(__ppc64__) || defined(__powerpc64__) || defined(__aarch64__) || defined(_M_X64) || defined(__x86_64__) || defined(__x86_64) || defined(__s390x__)
+#define stbsp__uintptr stbsp__uint64
+#else
+#define stbsp__uintptr stbsp__uint32
+#endif
+#endif
+
+#ifndef STB_SPRINTF_MSVC_MODE // used for MSVC2013 and earlier (MSVC2015 matches GCC)
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define STB_SPRINTF_MSVC_MODE
+#endif
+#endif
+
+#ifdef STB_SPRINTF_NOUNALIGNED // define this before inclusion to force stbsp_sprintf to always use aligned accesses
+#define STBSP__UNALIGNED(code)
+#else
+#define STBSP__UNALIGNED(code) code
+#endif
+
+#ifndef STB_SPRINTF_NOFLOAT
+// internal float utility functions
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits);
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value);
+#define STBSP__SPECIAL 0x7000
+#endif
+
+static char stbsp__period = '.';
+static char stbsp__comma = ',';
+static struct
+{
+   short temp; // force next field to be 2-byte aligned
+   char pair[201];
+} stbsp__digitpair =
+{
+  0,
+   "00010203040506070809101112131415161718192021222324"
+   "25262728293031323334353637383940414243444546474849"
+   "50515253545556575859606162636465666768697071727374"
+   "75767778798081828384858687888990919293949596979899"
+};
+
+STBSP__PUBLICDEF void STB_SPRINTF_DECORATE(set_separators)(char pcomma, char pperiod)
+{
+   stbsp__period = pperiod;
+   stbsp__comma = pcomma;
+}
+
+#define STBSP__LEFTJUST 1
+#define STBSP__LEADINGPLUS 2
+#define STBSP__LEADINGSPACE 4
+#define STBSP__LEADING_0X 8
+#define STBSP__LEADINGZERO 16
+#define STBSP__INTMAX 32
+#define STBSP__TRIPLET_COMMA 64
+#define STBSP__NEGATIVE 128
+#define STBSP__METRIC_SUFFIX 256
+#define STBSP__HALFWIDTH 512
+#define STBSP__METRIC_NOSPACE 1024
+#define STBSP__METRIC_1024 2048
+#define STBSP__METRIC_JEDEC 4096
+
+static void stbsp__lead_sign(stbsp__uint32 fl, char *sign)
+{
+   sign[0] = 0;
+   if (fl & STBSP__NEGATIVE) {
+      sign[0] = 1;
+      sign[1] = '-';
+   } else if (fl & STBSP__LEADINGSPACE) {
+      sign[0] = 1;
+      sign[1] = ' ';
+   } else if (fl & STBSP__LEADINGPLUS) {
+      sign[0] = 1;
+      sign[1] = '+';
+   }
+}
+
+static STBSP__ASAN stbsp__uint32 stbsp__strlen_limited(char const *s, stbsp__uint32 limit)
+{
+   char const * sn = s;
+
+   // get up to 4-byte alignment
+   for (;;) {
+      if (((stbsp__uintptr)sn & 3) == 0)
+         break;
+
+      if (!limit || *sn == 0)
+         return (stbsp__uint32)(sn - s);
+
+      ++sn;
+      --limit;
+   }
+
+   // scan over 4 bytes at a time to find terminating 0
+   // this will intentionally scan up to 3 bytes past the end of buffers,
+   // but becase it works 4B aligned, it will never cross page boundaries
+   // (hence the STBSP__ASAN markup; the over-read here is intentional
+   // and harmless)
+   while (limit >= 4) {
+      stbsp__uint32 v = *(stbsp__uint32 *)sn;
+      // bit hack to find if there's a 0 byte in there
+      if ((v - 0x01010101) & (~v) & 0x80808080UL)
+         break;
+
+      sn += 4;
+      limit -= 4;
+   }
+
+   // handle the last few characters to find actual size
+   while (limit && *sn) {
+      ++sn;
+      --limit;
+   }
+
+   return (stbsp__uint32)(sn - s);
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintfcb)(STBSP_SPRINTFCB *callback, void *user, char *buf, char const *fmt, va_list va)
+{
+   static char hex[] = "0123456789abcdefxp";
+   static char hexu[] = "0123456789ABCDEFXP";
+   char *bf;
+   char const *f;
+   int tlen = 0;
+
+   bf = buf;
+   f = fmt;
+   for (;;) {
+      stbsp__int32 fw, pr, tz;
+      stbsp__uint32 fl;
+
+      // macros for the callback buffer stuff
+      #define stbsp__chk_cb_bufL(bytes)                        \
+         {                                                     \
+            int len = (int)(bf - buf);                         \
+            if ((len + (bytes)) >= STB_SPRINTF_MIN) {          \
+               tlen += len;                                    \
+               if (0 == (bf = buf = callback(buf, user, len))) \
+                  goto done;                                   \
+            }                                                  \
+         }
+      #define stbsp__chk_cb_buf(bytes)    \
+         {                                \
+            if (callback) {               \
+               stbsp__chk_cb_bufL(bytes); \
+            }                             \
+         }
+      #define stbsp__flush_cb()                      \
+         {                                           \
+            stbsp__chk_cb_bufL(STB_SPRINTF_MIN - 1); \
+         } // flush if there is even one byte in the buffer
+      #define stbsp__cb_buf_clamp(cl, v)                \
+         cl = v;                                        \
+         if (callback) {                                \
+            int lg = STB_SPRINTF_MIN - (int)(bf - buf); \
+            if (cl > lg)                                \
+               cl = lg;                                 \
+         }
+
+      // fast copy everything up to the next % (or end of string)
+      for (;;) {
+         while (((stbsp__uintptr)f) & 3) {
+         schk1:
+            if (f[0] == '%')
+               goto scandd;
+         schk2:
+            if (f[0] == 0)
+               goto endfmt;
+            stbsp__chk_cb_buf(1);
+            *bf++ = f[0];
+            ++f;
+         }
+         for (;;) {
+            // Check if the next 4 bytes contain %(0x25) or end of string.
+            // Using the 'hasless' trick:
+            // https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+            stbsp__uint32 v, c;
+            v = *(stbsp__uint32 *)f;
+            c = (~v) & 0x80808080;
+            if (((v ^ 0x25252525) - 0x01010101) & c)
+               goto schk1;
+            if ((v - 0x01010101) & c)
+               goto schk2;
+            if (callback)
+               if ((STB_SPRINTF_MIN - (int)(bf - buf)) < 4)
+                  goto schk1;
+            #ifdef STB_SPRINTF_NOUNALIGNED
+                if(((stbsp__uintptr)bf) & 3) {
+                    bf[0] = f[0];
+                    bf[1] = f[1];
+                    bf[2] = f[2];
+                    bf[3] = f[3];
+                } else
+            #endif
+            {
+                *(stbsp__uint32 *)bf = v;
+            }
+            bf += 4;
+            f += 4;
+         }
+      }
+   scandd:
+
+      ++f;
+
+      // ok, we have a percent, read the modifiers first
+      fw = 0;
+      pr = -1;
+      fl = 0;
+      tz = 0;
+
+      // flags
+      for (;;) {
+         switch (f[0]) {
+         // if we have left justify
+         case '-':
+            fl |= STBSP__LEFTJUST;
+            ++f;
+            continue;
+         // if we have leading plus
+         case '+':
+            fl |= STBSP__LEADINGPLUS;
+            ++f;
+            continue;
+         // if we have leading space
+         case ' ':
+            fl |= STBSP__LEADINGSPACE;
+            ++f;
+            continue;
+         // if we have leading 0x
+         case '#':
+            fl |= STBSP__LEADING_0X;
+            ++f;
+            continue;
+         // if we have thousand commas
+         case '\'':
+            fl |= STBSP__TRIPLET_COMMA;
+            ++f;
+            continue;
+         // if we have kilo marker (none->kilo->kibi->jedec)
+         case '$':
+            if (fl & STBSP__METRIC_SUFFIX) {
+               if (fl & STBSP__METRIC_1024) {
+                  fl |= STBSP__METRIC_JEDEC;
+               } else {
+                  fl |= STBSP__METRIC_1024;
+               }
+            } else {
+               fl |= STBSP__METRIC_SUFFIX;
+            }
+            ++f;
+            continue;
+         // if we don't want space between metric suffix and number
+         case '_':
+            fl |= STBSP__METRIC_NOSPACE;
+            ++f;
+            continue;
+         // if we have leading zero
+         case '0':
+            fl |= STBSP__LEADINGZERO;
+            ++f;
+            goto flags_done;
+         default: goto flags_done;
+         }
+      }
+   flags_done:
+
+      // get the field width
+      if (f[0] == '*') {
+         fw = va_arg(va, stbsp__uint32);
+         ++f;
+      } else {
+         while ((f[0] >= '0') && (f[0] <= '9')) {
+            fw = fw * 10 + f[0] - '0';
+            f++;
+         }
+      }
+      // get the precision
+      if (f[0] == '.') {
+         ++f;
+         if (f[0] == '*') {
+            pr = va_arg(va, stbsp__uint32);
+            ++f;
+         } else {
+            pr = 0;
+            while ((f[0] >= '0') && (f[0] <= '9')) {
+               pr = pr * 10 + f[0] - '0';
+               f++;
+            }
+         }
+      }
+
+      // handle integer size overrides
+      switch (f[0]) {
+      // are we halfwidth?
+      case 'h':
+         fl |= STBSP__HALFWIDTH;
+         ++f;
+         if (f[0] == 'h')
+            ++f;  // QUARTERWIDTH
+         break;
+      // are we 64-bit (unix style)
+      case 'l':
+         fl |= ((sizeof(long) == 8) ? STBSP__INTMAX : 0);
+         ++f;
+         if (f[0] == 'l') {
+            fl |= STBSP__INTMAX;
+            ++f;
+         }
+         break;
+      // are we 64-bit on intmax? (c99)
+      case 'j':
+         fl |= (sizeof(size_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit on size_t or ptrdiff_t? (c99)
+      case 'z':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      case 't':
+         fl |= (sizeof(ptrdiff_t) == 8) ? STBSP__INTMAX : 0;
+         ++f;
+         break;
+      // are we 64-bit (msft style)
+      case 'I':
+         if ((f[1] == '6') && (f[2] == '4')) {
+            fl |= STBSP__INTMAX;
+            f += 3;
+         } else if ((f[1] == '3') && (f[2] == '2')) {
+            f += 3;
+         } else {
+            fl |= ((sizeof(void *) == 8) ? STBSP__INTMAX : 0);
+            ++f;
+         }
+         break;
+      default: break;
+      }
+
+      // handle each replacement
+      switch (f[0]) {
+         #define STBSP__NUMSZ 512 // big enough for e308 (with commas) or e-307
+         char num[STBSP__NUMSZ];
+         char lead[8];
+         char tail[8];
+         char *s;
+         char const *h;
+         stbsp__uint32 l, n, cs;
+         stbsp__uint64 n64;
+#ifndef STB_SPRINTF_NOFLOAT
+         double fv;
+#endif
+         stbsp__int32 dp;
+         char const *sn;
+
+      case 's':
+         // get the string
+         s = va_arg(va, char *);
+         if (s == 0)
+            s = (char *)"null";
+         // get the length, limited to desired precision
+         // always limit to ~0u chars since our counts are 32b
+         l = stbsp__strlen_limited(s, (pr >= 0) ? pr : ~0u);
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         // copy the string in
+         goto scopy;
+
+      case 'c': // char
+         // get the character
+         s = num + STBSP__NUMSZ - 1;
+         *s = (char)va_arg(va, int);
+         l = 1;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+
+      case 'n': // weird write-bytes specifier
+      {
+         int *d = va_arg(va, int *);
+         *d = tlen + (int)(bf - buf);
+      } break;
+
+#ifdef STB_SPRINTF_NOFLOAT
+      case 'A':              // float
+      case 'a':              // hex float
+      case 'G':              // float
+      case 'g':              // float
+      case 'E':              // float
+      case 'e':              // float
+      case 'f':              // float
+         va_arg(va, double); // eat it
+         s = (char *)"No float";
+         l = 8;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         cs = 0;
+         STBSP__NOTUSED(dp);
+         goto scopy;
+#else
+      case 'A': // hex float
+      case 'a': // hex float
+         h = (f[0] == 'A') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
+            fl |= STBSP__NEGATIVE;
+
+         s = num + 64;
+
+         stbsp__lead_sign(fl, lead);
+
+         if (dp == -1023)
+            dp = (n64) ? -1022 : 0;
+         else
+            n64 |= (((stbsp__uint64)1) << 52);
+         n64 <<= (64 - 56);
+         if (pr < 15)
+            n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
+// add leading chars
+
+#ifdef STB_SPRINTF_MSVC_MODE
+         *s++ = '0';
+         *s++ = 'x';
+#else
+         lead[1 + lead[0]] = '0';
+         lead[2 + lead[0]] = 'x';
+         lead[0] += 2;
+#endif
+         *s++ = h[(n64 >> 60) & 15];
+         n64 <<= 4;
+         if (pr)
+            *s++ = stbsp__period;
+         sn = s;
+
+         // print the bits
+         n = pr;
+         if (n > 13)
+            n = 13;
+         if (pr > (stbsp__int32)n)
+            tz = pr - n;
+         pr = 0;
+         while (n--) {
+            *s++ = h[(n64 >> 60) & 15];
+            n64 <<= 4;
+         }
+
+         // print the expo
+         tail[1] = h[17];
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+         n = (dp >= 1000) ? 6 : ((dp >= 100) ? 5 : ((dp >= 10) ? 4 : 3));
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+
+         dp = (int)(s - sn);
+         l = (int)(s - (num + 64));
+         s = num + 64;
+         cs = 1 + (3 << 24);
+         goto scopy;
+
+      case 'G': // float
+      case 'g': // float
+         h = (f[0] == 'G') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6;
+         else if (pr == 0)
+            pr = 1; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, (pr - 1) | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+
+         // clamp the precision and delete extra zeros after clamp
+         n = pr;
+         if (l > (stbsp__uint32)pr)
+            l = pr;
+         while ((l > 1) && (pr) && (sn[l - 1] == '0')) {
+            --pr;
+            --l;
+         }
+
+         // should we use %e
+         if ((dp <= -4) || (dp > (stbsp__int32)n)) {
+            if (pr > (stbsp__int32)l)
+               pr = l - 1;
+            else if (pr)
+               --pr; // when using %e, there is one digit before the decimal
+            goto doexpfromg;
+         }
+         // this is the insane action to get the pr to match %g semantics for %f
+         if (dp > 0) {
+            pr = (dp < (stbsp__int32)l) ? l - dp : 0;
+         } else {
+            pr = -dp + ((pr > (stbsp__int32)l) ? (stbsp__int32) l : pr);
+         }
+         goto dofloatfromg;
+
+      case 'E': // float
+      case 'e': // float
+         h = (f[0] == 'E') ? hexu : hex;
+         fv = va_arg(va, double);
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr | 0x80000000))
+            fl |= STBSP__NEGATIVE;
+      doexpfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+         // handle leading chars
+         *s++ = sn[0];
+
+         if (pr)
+            *s++ = stbsp__period;
+
+         // handle after decimal
+         if ((l - 1) > (stbsp__uint32)pr)
+            l = pr + 1;
+         for (n = 1; n < l; n++)
+            *s++ = sn[n];
+         // trailing zeros
+         tz = pr - (l - 1);
+         pr = 0;
+         // dump expo
+         tail[1] = h[0xe];
+         dp -= 1;
+         if (dp < 0) {
+            tail[2] = '-';
+            dp = -dp;
+         } else
+            tail[2] = '+';
+#ifdef STB_SPRINTF_MSVC_MODE
+         n = 5;
+#else
+         n = (dp >= 100) ? 5 : 4;
+#endif
+         tail[0] = (char)n;
+         for (;;) {
+            tail[n] = '0' + dp % 10;
+            if (n <= 3)
+               break;
+            --n;
+            dp /= 10;
+         }
+         cs = 1 + (3 << 24); // how many tens
+         goto flt_lead;
+
+      case 'f': // float
+         fv = va_arg(va, double);
+      doafloat:
+         // do kilos
+         if (fl & STBSP__METRIC_SUFFIX) {
+            double divisor;
+            divisor = 1000.0f;
+            if (fl & STBSP__METRIC_1024)
+               divisor = 1024.0;
+            while (fl < 0x4000000) {
+               if ((fv < divisor) && (fv > -divisor))
+                  break;
+               fv /= divisor;
+               fl += 0x1000000;
+            }
+         }
+         if (pr == -1)
+            pr = 6; // default is 6
+         // read the double into a string
+         if (stbsp__real_to_str(&sn, &l, num, &dp, fv, pr))
+            fl |= STBSP__NEGATIVE;
+      dofloatfromg:
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+         if (dp == STBSP__SPECIAL) {
+            s = (char *)sn;
+            cs = 0;
+            pr = 0;
+            goto scopy;
+         }
+         s = num + 64;
+
+         // handle the three decimal varieties
+         if (dp <= 0) {
+            stbsp__int32 i;
+            // handle 0.000*000xxxx
+            *s++ = '0';
+            if (pr)
+               *s++ = stbsp__period;
+            n = -dp;
+            if ((stbsp__int32)n > pr)
+               n = pr;
+            i = n;
+            while (i) {
+               if ((((stbsp__uintptr)s) & 3) == 0)
+                  break;
+               *s++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)s = 0x30303030;
+               s += 4;
+               i -= 4;
+            }
+            while (i) {
+               *s++ = '0';
+               --i;
+            }
+            if ((stbsp__int32)(l + n) > pr)
+               l = pr - n;
+            i = l;
+            while (i) {
+               *s++ = *sn++;
+               --i;
+            }
+            tz = pr - (n + l);
+            cs = 1 + (3 << 24); // how many tens did we write (for commas below)
+         } else {
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((600 - (stbsp__uint32)dp) % 3) : 0;
+            if ((stbsp__uint32)dp >= l) {
+               // handle xxxx000*000.0
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= l)
+                        break;
+                  }
+               }
+               if (n < (stbsp__uint32)dp) {
+                  n = dp - n;
+                  if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                     while (n) {
+                        if ((((stbsp__uintptr)s) & 3) == 0)
+                           break;
+                        *s++ = '0';
+                        --n;
+                     }
+                     while (n >= 4) {
+                        *(stbsp__uint32 *)s = 0x30303030;
+                        s += 4;
+                        n -= 4;
+                     }
+                  }
+                  while (n) {
+                     if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                        cs = 0;
+                        *s++ = stbsp__comma;
+                     } else {
+                        *s++ = '0';
+                        --n;
+                     }
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr) {
+                  *s++ = stbsp__period;
+                  tz = pr;
+               }
+            } else {
+               // handle xxxxx.xxxx000*000
+               n = 0;
+               for (;;) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (++cs == 4)) {
+                     cs = 0;
+                     *s++ = stbsp__comma;
+                  } else {
+                     *s++ = sn[n];
+                     ++n;
+                     if (n >= (stbsp__uint32)dp)
+                        break;
+                  }
+               }
+               cs = (int)(s - (num + 64)) + (3 << 24); // cs is how many tens
+               if (pr)
+                  *s++ = stbsp__period;
+               if ((l - dp) > (stbsp__uint32)pr)
+                  l = pr + dp;
+               while (n < l) {
+                  *s++ = sn[n];
+                  ++n;
+               }
+               tz = pr - (l - dp);
+            }
+         }
+         pr = 0;
+
+         // handle k,m,g,t
+         if (fl & STBSP__METRIC_SUFFIX) {
+            char idx;
+            idx = 1;
+            if (fl & STBSP__METRIC_NOSPACE)
+               idx = 0;
+            tail[0] = idx;
+            tail[1] = ' ';
+            {
+               if (fl >> 24) { // SI kilo is 'k', JEDEC and SI kibits are 'K'.
+                  if (fl & STBSP__METRIC_1024)
+                     tail[idx + 1] = "_KMGT"[fl >> 24];
+                  else
+                     tail[idx + 1] = "_kMGT"[fl >> 24];
+                  idx++;
+                  // If printing kibits and not in jedec, add the 'i'.
+                  if (fl & STBSP__METRIC_1024 && !(fl & STBSP__METRIC_JEDEC)) {
+                     tail[idx + 1] = 'i';
+                     idx++;
+                  }
+                  tail[0] = idx;
+               }
+            }
+         };
+
+      flt_lead:
+         // get the length that we copied
+         l = (stbsp__uint32)(s - (num + 64));
+         s = num + 64;
+         goto scopy;
+#endif
+
+      case 'B': // upper binary
+      case 'b': // lower binary
+         h = (f[0] == 'B') ? hexu : hex;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[0xb];
+         }
+         l = (8 << 4) | (1 << 8);
+         goto radixnum;
+
+      case 'o': // octal
+         h = hexu;
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 1;
+            lead[1] = '0';
+         }
+         l = (3 << 4) | (3 << 8);
+         goto radixnum;
+
+      case 'p': // pointer
+         fl |= (sizeof(void *) == 8) ? STBSP__INTMAX : 0;
+         pr = sizeof(void *) * 2;
+         fl &= ~STBSP__LEADINGZERO; // 'p' only prints the pointer with zeros
+                                    // fall through - to X
+
+      case 'X': // upper hex
+      case 'x': // lower hex
+         h = (f[0] == 'X') ? hexu : hex;
+         l = (4 << 4) | (4 << 8);
+         lead[0] = 0;
+         if (fl & STBSP__LEADING_0X) {
+            lead[0] = 2;
+            lead[1] = '0';
+            lead[2] = h[16];
+         }
+      radixnum:
+         // get the number
+         if (fl & STBSP__INTMAX)
+            n64 = va_arg(va, stbsp__uint64);
+         else
+            n64 = va_arg(va, stbsp__uint32);
+
+         s = num + STBSP__NUMSZ;
+         dp = 0;
+         // clear tail, and clear leading if value is zero
+         tail[0] = 0;
+         if (n64 == 0) {
+            lead[0] = 0;
+            if (pr == 0) {
+               l = 0;
+               cs = 0;
+               goto scopy;
+            }
+         }
+         // convert to string
+         for (;;) {
+            *--s = h[n64 & ((1 << (l >> 8)) - 1)];
+            n64 >>= (l >> 8);
+            if (!((n64) || ((stbsp__int32)((num + STBSP__NUMSZ) - s) < pr)))
+               break;
+            if (fl & STBSP__TRIPLET_COMMA) {
+               ++l;
+               if ((l & 15) == ((l >> 4) & 15)) {
+                  l &= ~15;
+                  *--s = stbsp__comma;
+               }
+            }
+         };
+         // get the tens and the comma pos
+         cs = (stbsp__uint32)((num + STBSP__NUMSZ) - s) + ((((l >> 4) & 15)) << 24);
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         // copy it
+         goto scopy;
+
+      case 'u': // unsigned
+      case 'i':
+      case 'd': // integer
+         // get the integer and abs it
+         if (fl & STBSP__INTMAX) {
+            stbsp__int64 i64 = va_arg(va, stbsp__int64);
+            n64 = (stbsp__uint64)i64;
+            if ((f[0] != 'u') && (i64 < 0)) {
+               n64 = (stbsp__uint64)-i64;
+               fl |= STBSP__NEGATIVE;
+            }
+         } else {
+            stbsp__int32 i = va_arg(va, stbsp__int32);
+            n64 = (stbsp__uint32)i;
+            if ((f[0] != 'u') && (i < 0)) {
+               n64 = (stbsp__uint32)-i;
+               fl |= STBSP__NEGATIVE;
+            }
+         }
+
+#ifndef STB_SPRINTF_NOFLOAT
+         if (fl & STBSP__METRIC_SUFFIX) {
+            if (n64 < 1024)
+               pr = 0;
+            else if (pr == -1)
+               pr = 1;
+            fv = (double)(stbsp__int64)n64;
+            goto doafloat;
+         }
+#endif
+
+         // convert to string
+         s = num + STBSP__NUMSZ;
+         l = 0;
+
+         for (;;) {
+            // do in 32-bit chunks (avoid lots of 64-bit divides even with constant denominators)
+            char *o = s - 8;
+            if (n64 >= 100000000) {
+               n = (stbsp__uint32)(n64 % 100000000);
+               n64 /= 100000000;
+            } else {
+               n = (stbsp__uint32)n64;
+               n64 = 0;
+            }
+            if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+               do {
+                  s -= 2;
+                  *(stbsp__uint16 *)s = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+                  n /= 100;
+               } while (n);
+            }
+            while (n) {
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = (char)(n % 10) + '0';
+                  n /= 10;
+               }
+            }
+            if (n64 == 0) {
+               if ((s[0] == '0') && (s != (num + STBSP__NUMSZ)))
+                  ++s;
+               break;
+            }
+            while (s != o)
+               if ((fl & STBSP__TRIPLET_COMMA) && (l++ == 3)) {
+                  l = 0;
+                  *--s = stbsp__comma;
+                  --o;
+               } else {
+                  *--s = '0';
+               }
+         }
+
+         tail[0] = 0;
+         stbsp__lead_sign(fl, lead);
+
+         // get the length that we copied
+         l = (stbsp__uint32)((num + STBSP__NUMSZ) - s);
+         if (l == 0) {
+            *--s = '0';
+            l = 1;
+         }
+         cs = l + (3 << 24);
+         if (pr < 0)
+            pr = 0;
+
+      scopy:
+         // get fw=leading/trailing space, pr=leading zeros
+         if (pr < (stbsp__int32)l)
+            pr = l;
+         n = pr + lead[0] + tail[0] + tz;
+         if (fw < (stbsp__int32)n)
+            fw = n;
+         fw -= n;
+         pr -= l;
+
+         // handle right justify and leading zeros
+         if ((fl & STBSP__LEFTJUST) == 0) {
+            if (fl & STBSP__LEADINGZERO) // if leading zeros, everything is in pr
+            {
+               pr = (fw > pr) ? fw : pr;
+               fw = 0;
+            } else {
+               fl &= ~STBSP__TRIPLET_COMMA; // if no leading zeros, then no commas
+            }
+         }
+
+         // copy the spaces and/or zeros
+         if (fw + pr) {
+            stbsp__int32 i;
+            stbsp__uint32 c;
+
+            // copy leading spaces (or when doing %8.4d stuff)
+            if ((fl & STBSP__LEFTJUST) == 0)
+               while (fw > 0) {
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i) {
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  stbsp__chk_cb_buf(1);
+               }
+
+            // copy leader
+            sn = lead + 1;
+            while (lead[0]) {
+               stbsp__cb_buf_clamp(i, lead[0]);
+               lead[0] -= (char)i;
+               while (i) {
+                  *bf++ = *sn++;
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+
+            // copy leading zeros
+            c = cs >> 24;
+            cs &= 0xffffff;
+            cs = (fl & STBSP__TRIPLET_COMMA) ? ((stbsp__uint32)(c - ((pr + cs) % (c + 1)))) : 0;
+            while (pr > 0) {
+               stbsp__cb_buf_clamp(i, pr);
+               pr -= i;
+               if ((fl & STBSP__TRIPLET_COMMA) == 0) {
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = '0';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x30303030;
+                     bf += 4;
+                     i -= 4;
+                  }
+               }
+               while (i) {
+                  if ((fl & STBSP__TRIPLET_COMMA) && (cs++ == c)) {
+                     cs = 0;
+                     *bf++ = stbsp__comma;
+                  } else
+                     *bf++ = '0';
+                  --i;
+               }
+               stbsp__chk_cb_buf(1);
+            }
+         }
+
+         // copy leader if there is still one
+         sn = lead + 1;
+         while (lead[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, lead[0]);
+            lead[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy the string
+         n = l;
+         while (n) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, n);
+            n -= i;
+            STBSP__UNALIGNED(while (i >= 4) {
+               *(stbsp__uint32 volatile *)bf = *(stbsp__uint32 volatile *)s;
+               bf += 4;
+               s += 4;
+               i -= 4;
+            })
+            while (i) {
+               *bf++ = *s++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy trailing zeros
+         while (tz) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tz);
+            tz -= i;
+            while (i) {
+               if ((((stbsp__uintptr)bf) & 3) == 0)
+                  break;
+               *bf++ = '0';
+               --i;
+            }
+            while (i >= 4) {
+               *(stbsp__uint32 *)bf = 0x30303030;
+               bf += 4;
+               i -= 4;
+            }
+            while (i) {
+               *bf++ = '0';
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // copy tail if there is one
+         sn = tail + 1;
+         while (tail[0]) {
+            stbsp__int32 i;
+            stbsp__cb_buf_clamp(i, tail[0]);
+            tail[0] -= (char)i;
+            while (i) {
+               *bf++ = *sn++;
+               --i;
+            }
+            stbsp__chk_cb_buf(1);
+         }
+
+         // handle the left justify
+         if (fl & STBSP__LEFTJUST)
+            if (fw > 0) {
+               while (fw) {
+                  stbsp__int32 i;
+                  stbsp__cb_buf_clamp(i, fw);
+                  fw -= i;
+                  while (i) {
+                     if ((((stbsp__uintptr)bf) & 3) == 0)
+                        break;
+                     *bf++ = ' ';
+                     --i;
+                  }
+                  while (i >= 4) {
+                     *(stbsp__uint32 *)bf = 0x20202020;
+                     bf += 4;
+                     i -= 4;
+                  }
+                  while (i--)
+                     *bf++ = ' ';
+                  stbsp__chk_cb_buf(1);
+               }
+            }
+         break;
+
+      default: // unknown, just copy code
+         s = num + STBSP__NUMSZ - 1;
+         *s = f[0];
+         l = 1;
+         fw = fl = 0;
+         lead[0] = 0;
+         tail[0] = 0;
+         pr = 0;
+         dp = 0;
+         cs = 0;
+         goto scopy;
+      }
+      ++f;
+   }
+endfmt:
+
+   if (!callback)
+      *bf = 0;
+   else
+      stbsp__flush_cb();
+
+done:
+   return tlen + (int)(bf - buf);
+}
+
+// cleanup
+#undef STBSP__LEFTJUST
+#undef STBSP__LEADINGPLUS
+#undef STBSP__LEADINGSPACE
+#undef STBSP__LEADING_0X
+#undef STBSP__LEADINGZERO
+#undef STBSP__INTMAX
+#undef STBSP__TRIPLET_COMMA
+#undef STBSP__NEGATIVE
+#undef STBSP__METRIC_SUFFIX
+#undef STBSP__NUMSZ
+#undef stbsp__chk_cb_bufL
+#undef stbsp__chk_cb_buf
+#undef stbsp__flush_cb
+#undef stbsp__cb_buf_clamp
+
+// ============================================================================
+//   wrapper functions
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(sprintf)(char *buf, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+   result = STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+   va_end(va);
+   return result;
+}
+
+typedef struct stbsp__context {
+   char *buf;
+   int count;
+   int length;
+   char tmp[STB_SPRINTF_MIN];
+} stbsp__context;
+
+static char *stbsp__clamp_callback(const char *buf, void *user, int len)
+{
+   stbsp__context *c = (stbsp__context *)user;
+   c->length += len;
+
+   if (len > c->count)
+      len = c->count;
+
+   if (len) {
+      if (buf != c->buf) {
+         const char *s, *se;
+         char *d;
+         d = c->buf;
+         s = buf;
+         se = buf + len;
+         do {
+            *d++ = *s++;
+         } while (s < se);
+      }
+      c->buf += len;
+      c->count -= len;
+   }
+
+   if (c->count <= 0)
+      return c->tmp;
+   return (c->count >= STB_SPRINTF_MIN) ? c->buf : c->tmp; // go direct into buffer if you can
+}
+
+static char * stbsp__count_clamp_callback( const char * buf, void * user, int len )
+{
+   stbsp__context * c = (stbsp__context*)user;
+   (void) sizeof(buf);
+
+   c->length += len;
+   return c->tmp; // go direct into buffer if you can
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE( vsnprintf )( char * buf, int count, char const * fmt, va_list va )
+{
+   stbsp__context c;
+
+   if ( (count == 0) && !buf )
+   {
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__count_clamp_callback, &c, c.tmp, fmt, va );
+   }
+   else
+   {
+      int l;
+
+      c.buf = buf;
+      c.count = count;
+      c.length = 0;
+
+      STB_SPRINTF_DECORATE( vsprintfcb )( stbsp__clamp_callback, &c, stbsp__clamp_callback(0,&c,0), fmt, va );
+
+      // zero-terminate
+      l = (int)( c.buf - buf );
+      if ( l >= count ) // should never be greater, only equal (or less) than count
+         l = count - 1;
+      buf[l] = 0;
+   }
+
+   return c.length;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(snprintf)(char *buf, int count, char const *fmt, ...)
+{
+   int result;
+   va_list va;
+   va_start(va, fmt);
+
+   result = STB_SPRINTF_DECORATE(vsnprintf)(buf, count, fmt, va);
+   va_end(va);
+
+   return result;
+}
+
+STBSP__PUBLICDEF int STB_SPRINTF_DECORATE(vsprintf)(char *buf, char const *fmt, va_list va)
+{
+   return STB_SPRINTF_DECORATE(vsprintfcb)(0, 0, buf, fmt, va);
+}
+
+// =======================================================================
+//   low level float utility functions
+
+#ifndef STB_SPRINTF_NOFLOAT
+
+// copies d to bits w/ strict aliasing (this compiles to nothing on /Ox)
+#define STBSP__COPYFP(dest, src)                   \
+   {                                               \
+      int cn;                                      \
+      for (cn = 0; cn < 8; cn++)                   \
+         ((char *)&dest)[cn] = ((char *)&src)[cn]; \
+   }
+
+// get float info
+static stbsp__int32 stbsp__real_to_parts(stbsp__int64 *bits, stbsp__int32 *expo, double value)
+{
+   double d;
+   stbsp__int64 b = 0;
+
+   // load value and round at the frac_digits
+   d = value;
+
+   STBSP__COPYFP(b, d);
+
+   *bits = b & ((((stbsp__uint64)1) << 52) - 1);
+   *expo = (stbsp__int32)(((b >> 52) & 2047) - 1023);
+
+   return (stbsp__int32)((stbsp__uint64) b >> 63);
+}
+
+static double const stbsp__bot[23] = {
+   1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009, 1e+010, 1e+011,
+   1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019, 1e+020, 1e+021, 1e+022
+};
+static double const stbsp__negbot[22] = {
+   1e-001, 1e-002, 1e-003, 1e-004, 1e-005, 1e-006, 1e-007, 1e-008, 1e-009, 1e-010, 1e-011,
+   1e-012, 1e-013, 1e-014, 1e-015, 1e-016, 1e-017, 1e-018, 1e-019, 1e-020, 1e-021, 1e-022
+};
+static double const stbsp__negboterr[22] = {
+   -5.551115123125783e-018,  -2.0816681711721684e-019, -2.0816681711721686e-020, -4.7921736023859299e-021, -8.1803053914031305e-022, 4.5251888174113741e-023,
+   4.5251888174113739e-024,  -2.0922560830128471e-025, -6.2281591457779853e-026, -3.6432197315497743e-027, 6.0503030718060191e-028,  2.0113352370744385e-029,
+   -3.0373745563400371e-030, 1.1806906454401013e-032,  -7.7705399876661076e-032, 2.0902213275965398e-033,  -7.1542424054621921e-034, -7.1542424054621926e-035,
+   2.4754073164739869e-036,  5.4846728545790429e-037,  9.2462547772103625e-038,  -4.8596774326570872e-039
+};
+static double const stbsp__top[13] = {
+   1e+023, 1e+046, 1e+069, 1e+092, 1e+115, 1e+138, 1e+161, 1e+184, 1e+207, 1e+230, 1e+253, 1e+276, 1e+299
+};
+static double const stbsp__negtop[13] = {
+   1e-023, 1e-046, 1e-069, 1e-092, 1e-115, 1e-138, 1e-161, 1e-184, 1e-207, 1e-230, 1e-253, 1e-276, 1e-299
+};
+static double const stbsp__toperr[13] = {
+   8388608,
+   6.8601809640529717e+028,
+   -7.253143638152921e+052,
+   -4.3377296974619174e+075,
+   -1.5559416129466825e+098,
+   -3.2841562489204913e+121,
+   -3.7745893248228135e+144,
+   -1.7356668416969134e+167,
+   -3.8893577551088374e+190,
+   -9.9566444326005119e+213,
+   6.3641293062232429e+236,
+   -5.2069140800249813e+259,
+   -5.2504760255204387e+282
+};
+static double const stbsp__negtoperr[13] = {
+   3.9565301985100693e-040,  -2.299904345391321e-063,  3.6506201437945798e-086,  1.1875228833981544e-109,
+   -5.0644902316928607e-132, -6.7156837247865426e-155, -2.812077463003139e-178,  -5.7778912386589953e-201,
+   7.4997100559334532e-224,  -4.6439668915134491e-247, -6.3691100762962136e-270, -9.436808465446358e-293,
+   8.0970921678014997e-317
+};
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1200)
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000,
+   100000000000,
+   1000000000000,
+   10000000000000,
+   100000000000000,
+   1000000000000000,
+   10000000000000000,
+   100000000000000000,
+   1000000000000000000,
+   10000000000000000000U
+};
+#define stbsp__tento19th ((stbsp__uint64)1000000000000000000)
+#else
+static stbsp__uint64 const stbsp__powten[20] = {
+   1,
+   10,
+   100,
+   1000,
+   10000,
+   100000,
+   1000000,
+   10000000,
+   100000000,
+   1000000000,
+   10000000000ULL,
+   100000000000ULL,
+   1000000000000ULL,
+   10000000000000ULL,
+   100000000000000ULL,
+   1000000000000000ULL,
+   10000000000000000ULL,
+   100000000000000000ULL,
+   1000000000000000000ULL,
+   10000000000000000000ULL
+};
+#define stbsp__tento19th (1000000000000000000ULL)
+#endif
+
+#define stbsp__ddmulthi(oh, ol, xh, yh)                            \
+   {                                                               \
+      double ahi = 0, alo, bhi = 0, blo;                           \
+      stbsp__int64 bt;                                             \
+      oh = xh * yh;                                                \
+      STBSP__COPYFP(bt, xh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(ahi, bt);                                      \
+      alo = xh - ahi;                                              \
+      STBSP__COPYFP(bt, yh);                                       \
+      bt &= ((~(stbsp__uint64)0) << 27);                           \
+      STBSP__COPYFP(bhi, bt);                                      \
+      blo = yh - bhi;                                              \
+      ol = ((ahi * bhi - oh) + ahi * blo + alo * bhi) + alo * blo; \
+   }
+
+#define stbsp__ddtoS64(ob, xh, xl)          \
+   {                                        \
+      double ahi = 0, alo, vh, t;           \
+      ob = (stbsp__int64)xh;                \
+      vh = (double)ob;                      \
+      ahi = (xh - vh);                      \
+      t = (ahi - xh);                       \
+      alo = (xh - (ahi - t)) - (vh + t);    \
+      ob += (stbsp__int64)(ahi + alo + xl); \
+   }
+
+#define stbsp__ddrenorm(oh, ol) \
+   {                            \
+      double s;                 \
+      s = oh + ol;              \
+      ol = ol - (s - oh);       \
+      oh = s;                   \
+   }
+
+#define stbsp__ddmultlo(oh, ol, xh, xl, yh, yl) ol = ol + (xh * yl + xl * yh);
+
+#define stbsp__ddmultlos(oh, ol, xh, yl) ol = ol + (xh * yl);
+
+static void stbsp__raise_to_power10(double *ohi, double *olo, double d, stbsp__int32 power) // power can be -323 to +350
+{
+   double ph, pl;
+   if ((power >= 0) && (power <= 22)) {
+      stbsp__ddmulthi(ph, pl, d, stbsp__bot[power]);
+   } else {
+      stbsp__int32 e, et, eb;
+      double p2h, p2l;
+
+      e = power;
+      if (power < 0)
+         e = -e;
+      et = (e * 0x2c9) >> 14; /* %23 */
+      if (et > 13)
+         et = 13;
+      eb = e - (et * 23);
+
+      ph = d;
+      pl = 0.0;
+      if (power < 0) {
+         if (eb) {
+            --eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__negbot[eb]);
+            stbsp__ddmultlos(ph, pl, d, stbsp__negboterr[eb]);
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__negtop[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__negtop[et], stbsp__negtoperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      } else {
+         if (eb) {
+            e = eb;
+            if (eb > 22)
+               eb = 22;
+            e -= eb;
+            stbsp__ddmulthi(ph, pl, d, stbsp__bot[eb]);
+            if (e) {
+               stbsp__ddrenorm(ph, pl);
+               stbsp__ddmulthi(p2h, p2l, ph, stbsp__bot[e]);
+               stbsp__ddmultlos(p2h, p2l, stbsp__bot[e], pl);
+               ph = p2h;
+               pl = p2l;
+            }
+         }
+         if (et) {
+            stbsp__ddrenorm(ph, pl);
+            --et;
+            stbsp__ddmulthi(p2h, p2l, ph, stbsp__top[et]);
+            stbsp__ddmultlo(p2h, p2l, ph, pl, stbsp__top[et], stbsp__toperr[et]);
+            ph = p2h;
+            pl = p2l;
+         }
+      }
+   }
+   stbsp__ddrenorm(ph, pl);
+   *ohi = ph;
+   *olo = pl;
+}
+
+// given a float value, returns the significant bits in bits, and the position of the
+//   decimal point in decimal_pos.  +/-INF and NAN are specified by special values
+//   returned in the decimal_pos parameter.
+// frac_digits is absolute normally, but if you want from first significant digits (got %g and %e), or in 0x80000000
+static stbsp__int32 stbsp__real_to_str(char const **start, stbsp__uint32 *len, char *out, stbsp__int32 *decimal_pos, double value, stbsp__uint32 frac_digits)
+{
+   double d;
+   stbsp__int64 bits = 0;
+   stbsp__int32 expo, e, ng, tens;
+
+   d = value;
+   STBSP__COPYFP(bits, d);
+   expo = (stbsp__int32)((bits >> 52) & 2047);
+   ng = (stbsp__int32)((stbsp__uint64) bits >> 63);
+   if (ng)
+      d = -d;
+
+   if (expo == 2047) // is nan or inf?
+   {
+      *start = (bits & ((((stbsp__uint64)1) << 52) - 1)) ? "NaN" : "Inf";
+      *decimal_pos = STBSP__SPECIAL;
+      *len = 3;
+      return ng;
+   }
+
+   if (expo == 0) // is zero or denormal
+   {
+      if (((stbsp__uint64) bits << 1) == 0) // do zero
+      {
+         *decimal_pos = 1;
+         *start = out;
+         out[0] = '0';
+         *len = 1;
+         return ng;
+      }
+      // find the right expo for denormals
+      {
+         stbsp__int64 v = ((stbsp__uint64)1) << 51;
+         while ((bits & v) == 0) {
+            --expo;
+            v >>= 1;
+         }
+      }
+   }
+
+   // find the decimal exponent as well as the decimal bits of the value
+   {
+      double ph, pl;
+
+      // log10 estimate - very specifically tweaked to hit or undershoot by no more than 1 of log10 of all expos 1..2046
+      tens = expo - 1023;
+      tens = (tens < 0) ? ((tens * 617) / 2048) : (((tens * 1233) / 4096) + 1);
+
+      // move the significant bits into position and stick them into an int
+      stbsp__raise_to_power10(&ph, &pl, d, 18 - tens);
+
+      // get full as much precision from double-double as possible
+      stbsp__ddtoS64(bits, ph, pl);
+
+      // check if we undershot
+      if (((stbsp__uint64)bits) >= stbsp__tento19th)
+         ++tens;
+   }
+
+   // now do the rounding in integer land
+   frac_digits = (frac_digits & 0x80000000) ? ((frac_digits & 0x7ffffff) + 1) : (tens + frac_digits);
+   if ((frac_digits < 24)) {
+      stbsp__uint32 dg = 1;
+      if ((stbsp__uint64)bits >= stbsp__powten[9])
+         dg = 10;
+      while ((stbsp__uint64)bits >= stbsp__powten[dg]) {
+         ++dg;
+         if (dg == 20)
+            goto noround;
+      }
+      if (frac_digits < dg) {
+         stbsp__uint64 r;
+         // add 0.5 at the right position and round
+         e = dg - frac_digits;
+         if ((stbsp__uint32)e >= 24)
+            goto noround;
+         r = stbsp__powten[e];
+         bits = bits + (r / 2);
+         if ((stbsp__uint64)bits >= stbsp__powten[dg])
+            ++tens;
+         bits /= r;
+      }
+   noround:;
+   }
+
+   // kill long trailing runs of zeros
+   if (bits) {
+      stbsp__uint32 n;
+      for (;;) {
+         if (bits <= 0xffffffff)
+            break;
+         if (bits % 1000)
+            goto donez;
+         bits /= 1000;
+      }
+      n = (stbsp__uint32)bits;
+      while ((n % 1000) == 0)
+         n /= 1000;
+      bits = n;
+   donez:;
+   }
+
+   // convert to string
+   out += 64;
+   e = 0;
+   for (;;) {
+      stbsp__uint32 n;
+      char *o = out - 8;
+      // do the conversion in chunks of U32s (avoid most 64-bit divides, worth it, constant denomiators be damned)
+      if (bits >= 100000000) {
+         n = (stbsp__uint32)(bits % 100000000);
+         bits /= 100000000;
+      } else {
+         n = (stbsp__uint32)bits;
+         bits = 0;
+      }
+      while (n) {
+         out -= 2;
+         *(stbsp__uint16 *)out = *(stbsp__uint16 *)&stbsp__digitpair.pair[(n % 100) * 2];
+         n /= 100;
+         e += 2;
+      }
+      if (bits == 0) {
+         if ((e) && (out[0] == '0')) {
+            ++out;
+            --e;
+         }
+         break;
+      }
+      while (out != o) {
+         *--out = '0';
+         ++e;
+      }
+   }
+
+   *decimal_pos = tens;
+   *start = out;
+   *len = e;
+   return ng;
+}
+
+#undef stbsp__ddmulthi
+#undef stbsp__ddrenorm
+#undef stbsp__ddmultlo
+#undef stbsp__ddmultlos
+#undef STBSP__SPECIAL
+#undef STBSP__COPYFP
+
+#endif // STB_SPRINTF_NOFLOAT
+
+// clean up
+#undef stbsp__uint16
+#undef stbsp__uint32
+#undef stbsp__int32
+#undef stbsp__uint64
+#undef stbsp__int64
+#undef STBSP__UNALIGNED
+
+#endif // STB_SPRINTF_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_textedit.h
@@ -1,0 +1,1429 @@
+// stb_textedit.h - v1.14  - public domain - Sean Barrett
+// Development of this library was sponsored by RAD Game Tools
+//
+// This C header file implements the guts of a multi-line text-editing
+// widget; you implement display, word-wrapping, and low-level string
+// insertion/deletion, and stb_textedit will map user inputs into
+// insertions & deletions, plus updates to the cursor position,
+// selection state, and undo state.
+//
+// It is intended for use in games and other systems that need to build
+// their own custom widgets and which do not have heavy text-editing
+// requirements (this library is not recommended for use for editing large
+// texts, as its performance does not scale and it has limited undo).
+//
+// Non-trivial behaviors are modelled after Windows text controls.
+//
+//
+// LICENSE
+//
+// See end of file for license information.
+//
+//
+// DEPENDENCIES
+//
+// Uses the C runtime function 'memmove', which you can override
+// by defining STB_TEXTEDIT_memmove before the implementation.
+// Uses no other functions. Performs no runtime allocations.
+//
+//
+// VERSION HISTORY
+//
+//   1.14 (2021-07-11) page up/down, various fixes
+//   1.13 (2019-02-07) fix bug in undo size management
+//   1.12 (2018-01-29) user can change STB_TEXTEDIT_KEYTYPE, fix redo to avoid crash
+//   1.11 (2017-03-03) fix HOME on last line, dragging off single-line textfield
+//   1.10 (2016-10-25) supress warnings about casting away const with -Wcast-qual
+//   1.9  (2016-08-27) customizable move-by-word
+//   1.8  (2016-04-02) better keyboard handling when mouse button is down
+//   1.7  (2015-09-13) change y range handling in case baseline is non-0
+//   1.6  (2015-04-15) allow STB_TEXTEDIT_memmove
+//   1.5  (2014-09-10) add support for secondary keys for OS X
+//   1.4  (2014-08-17) fix signed/unsigned warnings
+//   1.3  (2014-06-19) fix mouse clicking to round to nearest char boundary
+//   1.2  (2014-05-27) fix some RAD types that had crept into the new code
+//   1.1  (2013-12-15) move-by-word (requires STB_TEXTEDIT_IS_SPACE )
+//   1.0  (2012-07-26) improve documentation, initial public release
+//   0.3  (2012-02-24) bugfixes, single-line mode; insert mode
+//   0.2  (2011-11-28) fixes to undo/redo
+//   0.1  (2010-07-08) initial version
+//
+// ADDITIONAL CONTRIBUTORS
+//
+//   Ulf Winklemann: move-by-word in 1.1
+//   Fabian Giesen: secondary key inputs in 1.5
+//   Martins Mozeiko: STB_TEXTEDIT_memmove in 1.6
+//   Louis Schnellbach: page up/down in 1.14
+//
+//   Bugfixes:
+//      Scott Graham
+//      Daniel Keller
+//      Omar Cornut
+//      Dan Thompson
+//
+// USAGE
+//
+// This file behaves differently depending on what symbols you define
+// before including it.
+//
+//
+// Header-file mode:
+//
+//   If you do not define STB_TEXTEDIT_IMPLEMENTATION before including this,
+//   it will operate in "header file" mode. In this mode, it declares a
+//   single public symbol, STB_TexteditState, which encapsulates the current
+//   state of a text widget (except for the string, which you will store
+//   separately).
+//
+//   To compile in this mode, you must define STB_TEXTEDIT_CHARTYPE to a
+//   primitive type that defines a single character (e.g. char, wchar_t, etc).
+//
+//   To save space or increase undo-ability, you can optionally define the
+//   following things that are used by the undo system:
+//
+//      STB_TEXTEDIT_POSITIONTYPE         small int type encoding a valid cursor position
+//      STB_TEXTEDIT_UNDOSTATECOUNT       the number of undo states to allow
+//      STB_TEXTEDIT_UNDOCHARCOUNT        the number of characters to store in the undo buffer
+//
+//   If you don't define these, they are set to permissive types and
+//   moderate sizes. The undo system does no memory allocations, so
+//   it grows STB_TexteditState by the worst-case storage which is (in bytes):
+//
+//        [4 + 3 * sizeof(STB_TEXTEDIT_POSITIONTYPE)] * STB_TEXTEDIT_UNDOSTATECOUNT
+//      +          sizeof(STB_TEXTEDIT_CHARTYPE)      * STB_TEXTEDIT_UNDOCHARCOUNT
+//
+//
+// Implementation mode:
+//
+//   If you define STB_TEXTEDIT_IMPLEMENTATION before including this, it
+//   will compile the implementation of the text edit widget, depending
+//   on a large number of symbols which must be defined before the include.
+//
+//   The implementation is defined only as static functions. You will then
+//   need to provide your own APIs in the same file which will access the
+//   static functions.
+//
+//   The basic concept is that you provide a "string" object which
+//   behaves like an array of characters. stb_textedit uses indices to
+//   refer to positions in the string, implicitly representing positions
+//   in the displayed textedit. This is true for both plain text and
+//   rich text; even with rich text stb_truetype interacts with your
+//   code as if there was an array of all the displayed characters.
+//
+// Symbols that must be the same in header-file and implementation mode:
+//
+//     STB_TEXTEDIT_CHARTYPE             the character type
+//     STB_TEXTEDIT_POSITIONTYPE         small type that is a valid cursor position
+//     STB_TEXTEDIT_UNDOSTATECOUNT       the number of undo states to allow
+//     STB_TEXTEDIT_UNDOCHARCOUNT        the number of characters to store in the undo buffer
+//
+// Symbols you must define for implementation mode:
+//
+//    STB_TEXTEDIT_STRING               the type of object representing a string being edited,
+//                                      typically this is a wrapper object with other data you need
+//
+//    STB_TEXTEDIT_STRINGLEN(obj)       the length of the string (ideally O(1))
+//    STB_TEXTEDIT_LAYOUTROW(&r,obj,n)  returns the results of laying out a line of characters
+//                                        starting from character #n (see discussion below)
+//    STB_TEXTEDIT_GETWIDTH(obj,n,i)    returns the pixel delta from the xpos of the i'th character
+//                                        to the xpos of the i+1'th char for a line of characters
+//                                        starting at character #n (i.e. accounts for kerning
+//                                        with previous char)
+//    STB_TEXTEDIT_KEYTOTEXT(k)         maps a keyboard input to an insertable character
+//                                        (return type is int, -1 means not valid to insert)
+//    STB_TEXTEDIT_GETCHAR(obj,i)       returns the i'th character of obj, 0-based
+//    STB_TEXTEDIT_NEWLINE              the character returned by _GETCHAR() we recognize
+//                                        as manually wordwrapping for end-of-line positioning
+//
+//    STB_TEXTEDIT_DELETECHARS(obj,i,n)      delete n characters starting at i
+//    STB_TEXTEDIT_INSERTCHARS(obj,i,c*,n)   insert n characters at i (pointed to by STB_TEXTEDIT_CHARTYPE*)
+//
+//    STB_TEXTEDIT_K_SHIFT       a power of two that is or'd in to a keyboard input to represent the shift key
+//
+//    STB_TEXTEDIT_K_LEFT        keyboard input to move cursor left
+//    STB_TEXTEDIT_K_RIGHT       keyboard input to move cursor right
+//    STB_TEXTEDIT_K_UP          keyboard input to move cursor up
+//    STB_TEXTEDIT_K_DOWN        keyboard input to move cursor down
+//    STB_TEXTEDIT_K_PGUP        keyboard input to move cursor up a page
+//    STB_TEXTEDIT_K_PGDOWN      keyboard input to move cursor down a page
+//    STB_TEXTEDIT_K_LINESTART   keyboard input to move cursor to start of line  // e.g. HOME
+//    STB_TEXTEDIT_K_LINEEND     keyboard input to move cursor to end of line    // e.g. END
+//    STB_TEXTEDIT_K_TEXTSTART   keyboard input to move cursor to start of text  // e.g. ctrl-HOME
+//    STB_TEXTEDIT_K_TEXTEND     keyboard input to move cursor to end of text    // e.g. ctrl-END
+//    STB_TEXTEDIT_K_DELETE      keyboard input to delete selection or character under cursor
+//    STB_TEXTEDIT_K_BACKSPACE   keyboard input to delete selection or character left of cursor
+//    STB_TEXTEDIT_K_UNDO        keyboard input to perform undo
+//    STB_TEXTEDIT_K_REDO        keyboard input to perform redo
+//
+// Optional:
+//    STB_TEXTEDIT_K_INSERT              keyboard input to toggle insert mode
+//    STB_TEXTEDIT_IS_SPACE(ch)          true if character is whitespace (e.g. 'isspace'),
+//                                          required for default WORDLEFT/WORDRIGHT handlers
+//    STB_TEXTEDIT_MOVEWORDLEFT(obj,i)   custom handler for WORDLEFT, returns index to move cursor to
+//    STB_TEXTEDIT_MOVEWORDRIGHT(obj,i)  custom handler for WORDRIGHT, returns index to move cursor to
+//    STB_TEXTEDIT_K_WORDLEFT            keyboard input to move cursor left one word // e.g. ctrl-LEFT
+//    STB_TEXTEDIT_K_WORDRIGHT           keyboard input to move cursor right one word // e.g. ctrl-RIGHT
+//    STB_TEXTEDIT_K_LINESTART2          secondary keyboard input to move cursor to start of line
+//    STB_TEXTEDIT_K_LINEEND2            secondary keyboard input to move cursor to end of line
+//    STB_TEXTEDIT_K_TEXTSTART2          secondary keyboard input to move cursor to start of text
+//    STB_TEXTEDIT_K_TEXTEND2            secondary keyboard input to move cursor to end of text
+//
+// Keyboard input must be encoded as a single integer value; e.g. a character code
+// and some bitflags that represent shift states. to simplify the interface, SHIFT must
+// be a bitflag, so we can test the shifted state of cursor movements to allow selection,
+// i.e. (STB_TEXTEDIT_K_RIGHT|STB_TEXTEDIT_K_SHIFT) should be shifted right-arrow.
+//
+// You can encode other things, such as CONTROL or ALT, in additional bits, and
+// then test for their presence in e.g. STB_TEXTEDIT_K_WORDLEFT. For example,
+// my Windows implementations add an additional CONTROL bit, and an additional KEYDOWN
+// bit. Then all of the STB_TEXTEDIT_K_ values bitwise-or in the KEYDOWN bit,
+// and I pass both WM_KEYDOWN and WM_CHAR events to the "key" function in the
+// API below. The control keys will only match WM_KEYDOWN events because of the
+// keydown bit I add, and STB_TEXTEDIT_KEYTOTEXT only tests for the KEYDOWN
+// bit so it only decodes WM_CHAR events.
+//
+// STB_TEXTEDIT_LAYOUTROW returns information about the shape of one displayed
+// row of characters assuming they start on the i'th character--the width and
+// the height and the number of characters consumed. This allows this library
+// to traverse the entire layout incrementally. You need to compute word-wrapping
+// here.
+//
+// Each textfield keeps its own insert mode state, which is not how normal
+// applications work. To keep an app-wide insert mode, update/copy the
+// "insert_mode" field of STB_TexteditState before/after calling API functions.
+//
+// API
+//
+//    void stb_textedit_initialize_state(STB_TexteditState *state, int is_single_line)
+//
+//    void stb_textedit_click(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y)
+//    void stb_textedit_drag(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y)
+//    int  stb_textedit_cut(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+//    int  stb_textedit_paste(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_CHARTYPE *text, int len)
+//    void stb_textedit_key(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXEDIT_KEYTYPE key)
+//
+//    Each of these functions potentially updates the string and updates the
+//    state.
+//
+//      initialize_state:
+//          set the textedit state to a known good default state when initially
+//          constructing the textedit.
+//
+//      click:
+//          call this with the mouse x,y on a mouse down; it will update the cursor
+//          and reset the selection start/end to the cursor point. the x,y must
+//          be relative to the text widget, with (0,0) being the top left.
+//
+//      drag:
+//          call this with the mouse x,y on a mouse drag/up; it will update the
+//          cursor and the selection end point
+//
+//      cut:
+//          call this to delete the current selection; returns true if there was
+//          one. you should FIRST copy the current selection to the system paste buffer.
+//          (To copy, just copy the current selection out of the string yourself.)
+//
+//      paste:
+//          call this to paste text at the current cursor point or over the current
+//          selection if there is one.
+//
+//      key:
+//          call this for keyboard inputs sent to the textfield. you can use it
+//          for "key down" events or for "translated" key events. if you need to
+//          do both (as in Win32), or distinguish Unicode characters from control
+//          inputs, set a high bit to distinguish the two; then you can define the
+//          various definitions like STB_TEXTEDIT_K_LEFT have the is-key-event bit
+//          set, and make STB_TEXTEDIT_KEYTOCHAR check that the is-key-event bit is
+//          clear. STB_TEXTEDIT_KEYTYPE defaults to int, but you can #define it to
+//          anything other type you wante before including.
+//
+//
+//   When rendering, you can read the cursor position and selection state from
+//   the STB_TexteditState.
+//
+//
+// Notes:
+//
+// This is designed to be usable in IMGUI, so it allows for the possibility of
+// running in an IMGUI that has NOT cached the multi-line layout. For this
+// reason, it provides an interface that is compatible with computing the
+// layout incrementally--we try to make sure we make as few passes through
+// as possible. (For example, to locate the mouse pointer in the text, we
+// could define functions that return the X and Y positions of characters
+// and binary search Y and then X, but if we're doing dynamic layout this
+// will run the layout algorithm many times, so instead we manually search
+// forward in one pass. Similar logic applies to e.g. up-arrow and
+// down-arrow movement.)
+//
+// If it's run in a widget that *has* cached the layout, then this is less
+// efficient, but it's not horrible on modern computers. But you wouldn't
+// want to edit million-line files with it.
+
+
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+////
+////   Header-file mode
+////
+////
+
+#ifndef INCLUDE_STB_TEXTEDIT_H
+#define INCLUDE_STB_TEXTEDIT_H
+
+////////////////////////////////////////////////////////////////////////
+//
+//     STB_TexteditState
+//
+// Definition of STB_TexteditState which you should store
+// per-textfield; it includes cursor position, selection state,
+// and undo state.
+//
+
+#ifndef STB_TEXTEDIT_UNDOSTATECOUNT
+#define STB_TEXTEDIT_UNDOSTATECOUNT   99
+#endif
+#ifndef STB_TEXTEDIT_UNDOCHARCOUNT
+#define STB_TEXTEDIT_UNDOCHARCOUNT   999
+#endif
+#ifndef STB_TEXTEDIT_CHARTYPE
+#define STB_TEXTEDIT_CHARTYPE        int
+#endif
+#ifndef STB_TEXTEDIT_POSITIONTYPE
+#define STB_TEXTEDIT_POSITIONTYPE    int
+#endif
+
+typedef struct
+{
+   // private data
+   STB_TEXTEDIT_POSITIONTYPE  where;
+   STB_TEXTEDIT_POSITIONTYPE  insert_length;
+   STB_TEXTEDIT_POSITIONTYPE  delete_length;
+   int                        char_storage;
+} StbUndoRecord;
+
+typedef struct
+{
+   // private data
+   StbUndoRecord          undo_rec [STB_TEXTEDIT_UNDOSTATECOUNT];
+   STB_TEXTEDIT_CHARTYPE  undo_char[STB_TEXTEDIT_UNDOCHARCOUNT];
+   short undo_point, redo_point;
+   int undo_char_point, redo_char_point;
+} StbUndoState;
+
+typedef struct
+{
+   /////////////////////
+   //
+   // public data
+   //
+
+   int cursor;
+   // position of the text cursor within the string
+
+   int select_start;          // selection start point
+   int select_end;
+   // selection start and end point in characters; if equal, no selection.
+   // note that start may be less than or greater than end (e.g. when
+   // dragging the mouse, start is where the initial click was, and you
+   // can drag in either direction)
+
+   unsigned char insert_mode;
+   // each textfield keeps its own insert mode state. to keep an app-wide
+   // insert mode, copy this value in/out of the app state
+
+   int row_count_per_page;
+   // page size in number of row.
+   // this value MUST be set to >0 for pageup or pagedown in multilines documents.
+
+   /////////////////////
+   //
+   // private data
+   //
+   unsigned char cursor_at_end_of_line; // not implemented yet
+   unsigned char initialized;
+   unsigned char has_preferred_x;
+   unsigned char single_line;
+   unsigned char padding1, padding2, padding3;
+   float preferred_x; // this determines where the cursor up/down tries to seek to along x
+   StbUndoState undostate;
+} STB_TexteditState;
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//     StbTexteditRow
+//
+// Result of layout query, used by stb_textedit to determine where
+// the text in each row is.
+
+// result of layout query
+typedef struct
+{
+   float x0,x1;             // starting x location, end x location (allows for align=right, etc)
+   float baseline_y_delta;  // position of baseline relative to previous row's baseline
+   float ymin,ymax;         // height of row above and below baseline
+   int num_chars;
+} StbTexteditRow;
+#endif //INCLUDE_STB_TEXTEDIT_H
+
+
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+////
+////   Implementation mode
+////
+////
+
+
+// implementation isn't include-guarded, since it might have indirectly
+// included just the "header" portion
+#ifdef STB_TEXTEDIT_IMPLEMENTATION
+
+#ifndef STB_TEXTEDIT_memmove
+#include <string.h>
+#define STB_TEXTEDIT_memmove memmove
+#endif
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+//      Mouse input handling
+//
+
+// traverse the layout to locate the nearest character to a display position
+static int stb_text_locate_coord(STB_TEXTEDIT_STRING *str, float x, float y)
+{
+   StbTexteditRow r;
+   int n = STB_TEXTEDIT_STRINGLEN(str);
+   float base_y = 0, prev_x;
+   int i=0, k;
+
+   r.x0 = r.x1 = 0;
+   r.ymin = r.ymax = 0;
+   r.num_chars = 0;
+
+   // search rows to find one that straddles 'y'
+   while (i < n) {
+      STB_TEXTEDIT_LAYOUTROW(&r, str, i);
+      if (r.num_chars <= 0)
+         return n;
+
+      if (i==0 && y < base_y + r.ymin)
+         return 0;
+
+      if (y < base_y + r.ymax)
+         break;
+
+      i += r.num_chars;
+      base_y += r.baseline_y_delta;
+   }
+
+   // below all text, return 'after' last character
+   if (i >= n)
+      return n;
+
+   // check if it's before the beginning of the line
+   if (x < r.x0)
+      return i;
+
+   // check if it's before the end of the line
+   if (x < r.x1) {
+      // search characters in row for one that straddles 'x'
+      prev_x = r.x0;
+      for (k=0; k < r.num_chars; ++k) {
+         float w = STB_TEXTEDIT_GETWIDTH(str, i, k);
+         if (x < prev_x+w) {
+            if (x < prev_x+w/2)
+               return k+i;
+            else
+               return k+i+1;
+         }
+         prev_x += w;
+      }
+      // shouldn't happen, but if it does, fall through to end-of-line case
+   }
+
+   // if the last character is a newline, return that. otherwise return 'after' the last character
+   if (STB_TEXTEDIT_GETCHAR(str, i+r.num_chars-1) == STB_TEXTEDIT_NEWLINE)
+      return i+r.num_chars-1;
+   else
+      return i+r.num_chars;
+}
+
+// API click: on mouse down, move the cursor to the clicked location, and reset the selection
+static void stb_textedit_click(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y)
+{
+   // In single-line mode, just always make y = 0. This lets the drag keep working if the mouse
+   // goes off the top or bottom of the text
+   if( state->single_line )
+   {
+      StbTexteditRow r;
+      STB_TEXTEDIT_LAYOUTROW(&r, str, 0);
+      y = r.ymin;
+   }
+
+   state->cursor = stb_text_locate_coord(str, x, y);
+   state->select_start = state->cursor;
+   state->select_end = state->cursor;
+   state->has_preferred_x = 0;
+}
+
+// API drag: on mouse drag, move the cursor and selection endpoint to the clicked location
+static void stb_textedit_drag(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, float x, float y)
+{
+   int p = 0;
+
+   // In single-line mode, just always make y = 0. This lets the drag keep working if the mouse
+   // goes off the top or bottom of the text
+   if( state->single_line )
+   {
+      StbTexteditRow r;
+      STB_TEXTEDIT_LAYOUTROW(&r, str, 0);
+      y = r.ymin;
+   }
+
+   if (state->select_start == state->select_end)
+      state->select_start = state->cursor;
+
+   p = stb_text_locate_coord(str, x, y);
+   state->cursor = state->select_end = p;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+//
+//      Keyboard input handling
+//
+
+// forward declarations
+static void stb_text_undo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state);
+static void stb_text_redo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state);
+static void stb_text_makeundo_delete(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int length);
+static void stb_text_makeundo_insert(STB_TexteditState *state, int where, int length);
+static void stb_text_makeundo_replace(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int old_length, int new_length);
+
+typedef struct
+{
+   float x,y;    // position of n'th character
+   float height; // height of line
+   int first_char, length; // first char of row, and length
+   int prev_first;  // first char of previous row
+} StbFindState;
+
+// find the x/y location of a character, and remember info about the previous row in
+// case we get a move-up event (for page up, we'll have to rescan)
+static void stb_textedit_find_charpos(StbFindState *find, STB_TEXTEDIT_STRING *str, int n, int single_line)
+{
+   StbTexteditRow r;
+   int prev_start = 0;
+   int z = STB_TEXTEDIT_STRINGLEN(str);
+   int i=0, first;
+
+   if (n == z) {
+      // if it's at the end, then find the last line -- simpler than trying to
+      // explicitly handle this case in the regular code
+      if (single_line) {
+         STB_TEXTEDIT_LAYOUTROW(&r, str, 0);
+         find->y = 0;
+         find->first_char = 0;
+         find->length = z;
+         find->height = r.ymax - r.ymin;
+         find->x = r.x1;
+      } else {
+         find->y = 0;
+         find->x = 0;
+         find->height = 1;
+         while (i < z) {
+            STB_TEXTEDIT_LAYOUTROW(&r, str, i);
+            prev_start = i;
+            i += r.num_chars;
+         }
+         find->first_char = i;
+         find->length = 0;
+         find->prev_first = prev_start;
+      }
+      return;
+   }
+
+   // search rows to find the one that straddles character n
+   find->y = 0;
+
+   for(;;) {
+      STB_TEXTEDIT_LAYOUTROW(&r, str, i);
+      if (n < i + r.num_chars)
+         break;
+      prev_start = i;
+      i += r.num_chars;
+      find->y += r.baseline_y_delta;
+   }
+
+   find->first_char = first = i;
+   find->length = r.num_chars;
+   find->height = r.ymax - r.ymin;
+   find->prev_first = prev_start;
+
+   // now scan to find xpos
+   find->x = r.x0;
+   for (i=0; first+i < n; ++i)
+      find->x += STB_TEXTEDIT_GETWIDTH(str, first, i);
+}
+
+#define STB_TEXT_HAS_SELECTION(s)   ((s)->select_start != (s)->select_end)
+
+// make the selection/cursor state valid if client altered the string
+static void stb_textedit_clamp(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+{
+   int n = STB_TEXTEDIT_STRINGLEN(str);
+   if (STB_TEXT_HAS_SELECTION(state)) {
+      if (state->select_start > n) state->select_start = n;
+      if (state->select_end   > n) state->select_end = n;
+      // if clamping forced them to be equal, move the cursor to match
+      if (state->select_start == state->select_end)
+         state->cursor = state->select_start;
+   }
+   if (state->cursor > n) state->cursor = n;
+}
+
+// delete characters while updating undo
+static void stb_textedit_delete(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int len)
+{
+   stb_text_makeundo_delete(str, state, where, len);
+   STB_TEXTEDIT_DELETECHARS(str, where, len);
+   state->has_preferred_x = 0;
+}
+
+// delete the section
+static void stb_textedit_delete_selection(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+{
+   stb_textedit_clamp(str, state);
+   if (STB_TEXT_HAS_SELECTION(state)) {
+      if (state->select_start < state->select_end) {
+         stb_textedit_delete(str, state, state->select_start, state->select_end - state->select_start);
+         state->select_end = state->cursor = state->select_start;
+      } else {
+         stb_textedit_delete(str, state, state->select_end, state->select_start - state->select_end);
+         state->select_start = state->cursor = state->select_end;
+      }
+      state->has_preferred_x = 0;
+   }
+}
+
+// canoncialize the selection so start <= end
+static void stb_textedit_sortselection(STB_TexteditState *state)
+{
+   if (state->select_end < state->select_start) {
+      int temp = state->select_end;
+      state->select_end = state->select_start;
+      state->select_start = temp;
+   }
+}
+
+// move cursor to first character of selection
+static void stb_textedit_move_to_first(STB_TexteditState *state)
+{
+   if (STB_TEXT_HAS_SELECTION(state)) {
+      stb_textedit_sortselection(state);
+      state->cursor = state->select_start;
+      state->select_end = state->select_start;
+      state->has_preferred_x = 0;
+   }
+}
+
+// move cursor to last character of selection
+static void stb_textedit_move_to_last(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+{
+   if (STB_TEXT_HAS_SELECTION(state)) {
+      stb_textedit_sortselection(state);
+      stb_textedit_clamp(str, state);
+      state->cursor = state->select_end;
+      state->select_start = state->select_end;
+      state->has_preferred_x = 0;
+   }
+}
+
+#ifdef STB_TEXTEDIT_IS_SPACE
+static int is_word_boundary( STB_TEXTEDIT_STRING *str, int idx )
+{
+   return idx > 0 ? (STB_TEXTEDIT_IS_SPACE( STB_TEXTEDIT_GETCHAR(str,idx-1) ) && !STB_TEXTEDIT_IS_SPACE( STB_TEXTEDIT_GETCHAR(str, idx) ) ) : 1;
+}
+
+#ifndef STB_TEXTEDIT_MOVEWORDLEFT
+static int stb_textedit_move_to_word_previous( STB_TEXTEDIT_STRING *str, int c )
+{
+   --c; // always move at least one character
+   while( c >= 0 && !is_word_boundary( str, c ) )
+      --c;
+
+   if( c < 0 )
+      c = 0;
+
+   return c;
+}
+#define STB_TEXTEDIT_MOVEWORDLEFT stb_textedit_move_to_word_previous
+#endif
+
+#ifndef STB_TEXTEDIT_MOVEWORDRIGHT
+static int stb_textedit_move_to_word_next( STB_TEXTEDIT_STRING *str, int c )
+{
+   const int len = STB_TEXTEDIT_STRINGLEN(str);
+   ++c; // always move at least one character
+   while( c < len && !is_word_boundary( str, c ) )
+      ++c;
+
+   if( c > len )
+      c = len;
+
+   return c;
+}
+#define STB_TEXTEDIT_MOVEWORDRIGHT stb_textedit_move_to_word_next
+#endif
+
+#endif
+
+// update selection and cursor to match each other
+static void stb_textedit_prep_selection_at_cursor(STB_TexteditState *state)
+{
+   if (!STB_TEXT_HAS_SELECTION(state))
+      state->select_start = state->select_end = state->cursor;
+   else
+      state->cursor = state->select_end;
+}
+
+// API cut: delete selection
+static int stb_textedit_cut(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+{
+   if (STB_TEXT_HAS_SELECTION(state)) {
+      stb_textedit_delete_selection(str,state); // implicitly clamps
+      state->has_preferred_x = 0;
+      return 1;
+   }
+   return 0;
+}
+
+// API paste: replace existing selection with passed-in text
+static int stb_textedit_paste_internal(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_CHARTYPE *text, int len)
+{
+   // if there's a selection, the paste should delete it
+   stb_textedit_clamp(str, state);
+   stb_textedit_delete_selection(str,state);
+   // try to insert the characters
+   if (STB_TEXTEDIT_INSERTCHARS(str, state->cursor, text, len)) {
+      stb_text_makeundo_insert(state, state->cursor, len);
+      state->cursor += len;
+      state->has_preferred_x = 0;
+      return 1;
+   }
+   // note: paste failure will leave deleted selection, may be restored with an undo (see https://github.com/nothings/stb/issues/734 for details)
+   return 0;
+}
+
+#ifndef STB_TEXTEDIT_KEYTYPE
+#define STB_TEXTEDIT_KEYTYPE int
+#endif
+
+// API key: process a keyboard input
+static void stb_textedit_key(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_KEYTYPE key)
+{
+retry:
+   switch (key) {
+      default: {
+         int c = STB_TEXTEDIT_KEYTOTEXT(key);
+         if (c > 0) {
+            STB_TEXTEDIT_CHARTYPE ch = (STB_TEXTEDIT_CHARTYPE) c;
+
+            // can't add newline in single-line mode
+            if (c == '\n' && state->single_line)
+               break;
+
+            if (state->insert_mode && !STB_TEXT_HAS_SELECTION(state) && state->cursor < STB_TEXTEDIT_STRINGLEN(str)) {
+               stb_text_makeundo_replace(str, state, state->cursor, 1, 1);
+               STB_TEXTEDIT_DELETECHARS(str, state->cursor, 1);
+               if (STB_TEXTEDIT_INSERTCHARS(str, state->cursor, &ch, 1)) {
+                  ++state->cursor;
+                  state->has_preferred_x = 0;
+               }
+            } else {
+               stb_textedit_delete_selection(str,state); // implicitly clamps
+               if (STB_TEXTEDIT_INSERTCHARS(str, state->cursor, &ch, 1)) {
+                  stb_text_makeundo_insert(state, state->cursor, 1);
+                  ++state->cursor;
+                  state->has_preferred_x = 0;
+               }
+            }
+         }
+         break;
+      }
+
+#ifdef STB_TEXTEDIT_K_INSERT
+      case STB_TEXTEDIT_K_INSERT:
+         state->insert_mode = !state->insert_mode;
+         break;
+#endif
+
+      case STB_TEXTEDIT_K_UNDO:
+         stb_text_undo(str, state);
+         state->has_preferred_x = 0;
+         break;
+
+      case STB_TEXTEDIT_K_REDO:
+         stb_text_redo(str, state);
+         state->has_preferred_x = 0;
+         break;
+
+      case STB_TEXTEDIT_K_LEFT:
+         // if currently there's a selection, move cursor to start of selection
+         if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_move_to_first(state);
+         else
+            if (state->cursor > 0)
+               --state->cursor;
+         state->has_preferred_x = 0;
+         break;
+
+      case STB_TEXTEDIT_K_RIGHT:
+         // if currently there's a selection, move cursor to end of selection
+         if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_move_to_last(str, state);
+         else
+            ++state->cursor;
+         stb_textedit_clamp(str, state);
+         state->has_preferred_x = 0;
+         break;
+
+      case STB_TEXTEDIT_K_LEFT | STB_TEXTEDIT_K_SHIFT:
+         stb_textedit_clamp(str, state);
+         stb_textedit_prep_selection_at_cursor(state);
+         // move selection left
+         if (state->select_end > 0)
+            --state->select_end;
+         state->cursor = state->select_end;
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_MOVEWORDLEFT
+      case STB_TEXTEDIT_K_WORDLEFT:
+         if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_move_to_first(state);
+         else {
+            state->cursor = STB_TEXTEDIT_MOVEWORDLEFT(str, state->cursor);
+            stb_textedit_clamp( str, state );
+         }
+         break;
+
+      case STB_TEXTEDIT_K_WORDLEFT | STB_TEXTEDIT_K_SHIFT:
+         if( !STB_TEXT_HAS_SELECTION( state ) )
+            stb_textedit_prep_selection_at_cursor(state);
+
+         state->cursor = STB_TEXTEDIT_MOVEWORDLEFT(str, state->cursor);
+         state->select_end = state->cursor;
+
+         stb_textedit_clamp( str, state );
+         break;
+#endif
+
+#ifdef STB_TEXTEDIT_MOVEWORDRIGHT
+      case STB_TEXTEDIT_K_WORDRIGHT:
+         if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_move_to_last(str, state);
+         else {
+            state->cursor = STB_TEXTEDIT_MOVEWORDRIGHT(str, state->cursor);
+            stb_textedit_clamp( str, state );
+         }
+         break;
+
+      case STB_TEXTEDIT_K_WORDRIGHT | STB_TEXTEDIT_K_SHIFT:
+         if( !STB_TEXT_HAS_SELECTION( state ) )
+            stb_textedit_prep_selection_at_cursor(state);
+
+         state->cursor = STB_TEXTEDIT_MOVEWORDRIGHT(str, state->cursor);
+         state->select_end = state->cursor;
+
+         stb_textedit_clamp( str, state );
+         break;
+#endif
+
+      case STB_TEXTEDIT_K_RIGHT | STB_TEXTEDIT_K_SHIFT:
+         stb_textedit_prep_selection_at_cursor(state);
+         // move selection right
+         ++state->select_end;
+         stb_textedit_clamp(str, state);
+         state->cursor = state->select_end;
+         state->has_preferred_x = 0;
+         break;
+
+      case STB_TEXTEDIT_K_DOWN:
+      case STB_TEXTEDIT_K_DOWN | STB_TEXTEDIT_K_SHIFT:
+      case STB_TEXTEDIT_K_PGDOWN:
+      case STB_TEXTEDIT_K_PGDOWN | STB_TEXTEDIT_K_SHIFT: {
+         StbFindState find;
+         StbTexteditRow row;
+         int i, j, sel = (key & STB_TEXTEDIT_K_SHIFT) != 0;
+         int is_page = (key & ~STB_TEXTEDIT_K_SHIFT) == STB_TEXTEDIT_K_PGDOWN;
+         int row_count = is_page ? state->row_count_per_page : 1;
+
+         if (!is_page && state->single_line) {
+            // on windows, up&down in single-line behave like left&right
+            key = STB_TEXTEDIT_K_RIGHT | (key & STB_TEXTEDIT_K_SHIFT);
+            goto retry;
+         }
+
+         if (sel)
+            stb_textedit_prep_selection_at_cursor(state);
+         else if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_move_to_last(str, state);
+
+         // compute current position of cursor point
+         stb_textedit_clamp(str, state);
+         stb_textedit_find_charpos(&find, str, state->cursor, state->single_line);
+
+         for (j = 0; j < row_count; ++j) {
+            float x, goal_x = state->has_preferred_x ? state->preferred_x : find.x;
+            int start = find.first_char + find.length;
+
+            if (find.length == 0)
+               break;
+
+            // now find character position down a row
+            state->cursor = start;
+            STB_TEXTEDIT_LAYOUTROW(&row, str, state->cursor);
+            x = row.x0;
+            for (i=0; i < row.num_chars; ++i) {
+               float dx = STB_TEXTEDIT_GETWIDTH(str, start, i);
+               #ifdef STB_TEXTEDIT_GETWIDTH_NEWLINE
+               if (dx == STB_TEXTEDIT_GETWIDTH_NEWLINE)
+                  break;
+               #endif
+               x += dx;
+               if (x > goal_x)
+                  break;
+               ++state->cursor;
+            }
+            stb_textedit_clamp(str, state);
+
+            state->has_preferred_x = 1;
+            state->preferred_x = goal_x;
+
+            if (sel)
+               state->select_end = state->cursor;
+
+            // go to next line
+            find.first_char = find.first_char + find.length;
+            find.length = row.num_chars;
+         }
+         break;
+      }
+
+      case STB_TEXTEDIT_K_UP:
+      case STB_TEXTEDIT_K_UP | STB_TEXTEDIT_K_SHIFT:
+      case STB_TEXTEDIT_K_PGUP:
+      case STB_TEXTEDIT_K_PGUP | STB_TEXTEDIT_K_SHIFT: {
+         StbFindState find;
+         StbTexteditRow row;
+         int i, j, prev_scan, sel = (key & STB_TEXTEDIT_K_SHIFT) != 0;
+         int is_page = (key & ~STB_TEXTEDIT_K_SHIFT) == STB_TEXTEDIT_K_PGUP;
+         int row_count = is_page ? state->row_count_per_page : 1;
+
+         if (!is_page && state->single_line) {
+            // on windows, up&down become left&right
+            key = STB_TEXTEDIT_K_LEFT | (key & STB_TEXTEDIT_K_SHIFT);
+            goto retry;
+         }
+
+         if (sel)
+            stb_textedit_prep_selection_at_cursor(state);
+         else if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_move_to_first(state);
+
+         // compute current position of cursor point
+         stb_textedit_clamp(str, state);
+         stb_textedit_find_charpos(&find, str, state->cursor, state->single_line);
+
+         for (j = 0; j < row_count; ++j) {
+            float  x, goal_x = state->has_preferred_x ? state->preferred_x : find.x;
+
+            // can only go up if there's a previous row
+            if (find.prev_first == find.first_char)
+               break;
+
+            // now find character position up a row
+            state->cursor = find.prev_first;
+            STB_TEXTEDIT_LAYOUTROW(&row, str, state->cursor);
+            x = row.x0;
+            for (i=0; i < row.num_chars; ++i) {
+               float dx = STB_TEXTEDIT_GETWIDTH(str, find.prev_first, i);
+               #ifdef STB_TEXTEDIT_GETWIDTH_NEWLINE
+               if (dx == STB_TEXTEDIT_GETWIDTH_NEWLINE)
+                  break;
+               #endif
+               x += dx;
+               if (x > goal_x)
+                  break;
+               ++state->cursor;
+            }
+            stb_textedit_clamp(str, state);
+
+            state->has_preferred_x = 1;
+            state->preferred_x = goal_x;
+
+            if (sel)
+               state->select_end = state->cursor;
+
+            // go to previous line
+            // (we need to scan previous line the hard way. maybe we could expose this as a new API function?)
+            prev_scan = find.prev_first > 0 ? find.prev_first - 1 : 0;
+            while (prev_scan > 0 && STB_TEXTEDIT_GETCHAR(str, prev_scan - 1) != STB_TEXTEDIT_NEWLINE)
+               --prev_scan;
+            find.first_char = find.prev_first;
+            find.prev_first = prev_scan;
+         }
+         break;
+      }
+
+      case STB_TEXTEDIT_K_DELETE:
+      case STB_TEXTEDIT_K_DELETE | STB_TEXTEDIT_K_SHIFT:
+         if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_delete_selection(str, state);
+         else {
+            int n = STB_TEXTEDIT_STRINGLEN(str);
+            if (state->cursor < n)
+               stb_textedit_delete(str, state, state->cursor, 1);
+         }
+         state->has_preferred_x = 0;
+         break;
+
+      case STB_TEXTEDIT_K_BACKSPACE:
+      case STB_TEXTEDIT_K_BACKSPACE | STB_TEXTEDIT_K_SHIFT:
+         if (STB_TEXT_HAS_SELECTION(state))
+            stb_textedit_delete_selection(str, state);
+         else {
+            stb_textedit_clamp(str, state);
+            if (state->cursor > 0) {
+               stb_textedit_delete(str, state, state->cursor-1, 1);
+               --state->cursor;
+            }
+         }
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_K_TEXTSTART2
+      case STB_TEXTEDIT_K_TEXTSTART2:
+#endif
+      case STB_TEXTEDIT_K_TEXTSTART:
+         state->cursor = state->select_start = state->select_end = 0;
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_K_TEXTEND2
+      case STB_TEXTEDIT_K_TEXTEND2:
+#endif
+      case STB_TEXTEDIT_K_TEXTEND:
+         state->cursor = STB_TEXTEDIT_STRINGLEN(str);
+         state->select_start = state->select_end = 0;
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_K_TEXTSTART2
+      case STB_TEXTEDIT_K_TEXTSTART2 | STB_TEXTEDIT_K_SHIFT:
+#endif
+      case STB_TEXTEDIT_K_TEXTSTART | STB_TEXTEDIT_K_SHIFT:
+         stb_textedit_prep_selection_at_cursor(state);
+         state->cursor = state->select_end = 0;
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_K_TEXTEND2
+      case STB_TEXTEDIT_K_TEXTEND2 | STB_TEXTEDIT_K_SHIFT:
+#endif
+      case STB_TEXTEDIT_K_TEXTEND | STB_TEXTEDIT_K_SHIFT:
+         stb_textedit_prep_selection_at_cursor(state);
+         state->cursor = state->select_end = STB_TEXTEDIT_STRINGLEN(str);
+         state->has_preferred_x = 0;
+         break;
+
+
+#ifdef STB_TEXTEDIT_K_LINESTART2
+      case STB_TEXTEDIT_K_LINESTART2:
+#endif
+      case STB_TEXTEDIT_K_LINESTART:
+         stb_textedit_clamp(str, state);
+         stb_textedit_move_to_first(state);
+         if (state->single_line)
+            state->cursor = 0;
+         else while (state->cursor > 0 && STB_TEXTEDIT_GETCHAR(str, state->cursor-1) != STB_TEXTEDIT_NEWLINE)
+            --state->cursor;
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_K_LINEEND2
+      case STB_TEXTEDIT_K_LINEEND2:
+#endif
+      case STB_TEXTEDIT_K_LINEEND: {
+         int n = STB_TEXTEDIT_STRINGLEN(str);
+         stb_textedit_clamp(str, state);
+         stb_textedit_move_to_first(state);
+         if (state->single_line)
+             state->cursor = n;
+         else while (state->cursor < n && STB_TEXTEDIT_GETCHAR(str, state->cursor) != STB_TEXTEDIT_NEWLINE)
+             ++state->cursor;
+         state->has_preferred_x = 0;
+         break;
+      }
+
+#ifdef STB_TEXTEDIT_K_LINESTART2
+      case STB_TEXTEDIT_K_LINESTART2 | STB_TEXTEDIT_K_SHIFT:
+#endif
+      case STB_TEXTEDIT_K_LINESTART | STB_TEXTEDIT_K_SHIFT:
+         stb_textedit_clamp(str, state);
+         stb_textedit_prep_selection_at_cursor(state);
+         if (state->single_line)
+            state->cursor = 0;
+         else while (state->cursor > 0 && STB_TEXTEDIT_GETCHAR(str, state->cursor-1) != STB_TEXTEDIT_NEWLINE)
+            --state->cursor;
+         state->select_end = state->cursor;
+         state->has_preferred_x = 0;
+         break;
+
+#ifdef STB_TEXTEDIT_K_LINEEND2
+      case STB_TEXTEDIT_K_LINEEND2 | STB_TEXTEDIT_K_SHIFT:
+#endif
+      case STB_TEXTEDIT_K_LINEEND | STB_TEXTEDIT_K_SHIFT: {
+         int n = STB_TEXTEDIT_STRINGLEN(str);
+         stb_textedit_clamp(str, state);
+         stb_textedit_prep_selection_at_cursor(state);
+         if (state->single_line)
+             state->cursor = n;
+         else while (state->cursor < n && STB_TEXTEDIT_GETCHAR(str, state->cursor) != STB_TEXTEDIT_NEWLINE)
+            ++state->cursor;
+         state->select_end = state->cursor;
+         state->has_preferred_x = 0;
+         break;
+      }
+   }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+//
+//      Undo processing
+//
+// @OPTIMIZE: the undo/redo buffer should be circular
+
+static void stb_textedit_flush_redo(StbUndoState *state)
+{
+   state->redo_point = STB_TEXTEDIT_UNDOSTATECOUNT;
+   state->redo_char_point = STB_TEXTEDIT_UNDOCHARCOUNT;
+}
+
+// discard the oldest entry in the undo list
+static void stb_textedit_discard_undo(StbUndoState *state)
+{
+   if (state->undo_point > 0) {
+      // if the 0th undo state has characters, clean those up
+      if (state->undo_rec[0].char_storage >= 0) {
+         int n = state->undo_rec[0].insert_length, i;
+         // delete n characters from all other records
+         state->undo_char_point -= n;
+         STB_TEXTEDIT_memmove(state->undo_char, state->undo_char + n, (size_t) (state->undo_char_point*sizeof(STB_TEXTEDIT_CHARTYPE)));
+         for (i=0; i < state->undo_point; ++i)
+            if (state->undo_rec[i].char_storage >= 0)
+               state->undo_rec[i].char_storage -= n; // @OPTIMIZE: get rid of char_storage and infer it
+      }
+      --state->undo_point;
+      STB_TEXTEDIT_memmove(state->undo_rec, state->undo_rec+1, (size_t) (state->undo_point*sizeof(state->undo_rec[0])));
+   }
+}
+
+// discard the oldest entry in the redo list--it's bad if this
+// ever happens, but because undo & redo have to store the actual
+// characters in different cases, the redo character buffer can
+// fill up even though the undo buffer didn't
+static void stb_textedit_discard_redo(StbUndoState *state)
+{
+   int k = STB_TEXTEDIT_UNDOSTATECOUNT-1;
+
+   if (state->redo_point <= k) {
+      // if the k'th undo state has characters, clean those up
+      if (state->undo_rec[k].char_storage >= 0) {
+         int n = state->undo_rec[k].insert_length, i;
+         // move the remaining redo character data to the end of the buffer
+         state->redo_char_point += n;
+         STB_TEXTEDIT_memmove(state->undo_char + state->redo_char_point, state->undo_char + state->redo_char_point-n, (size_t) ((STB_TEXTEDIT_UNDOCHARCOUNT - state->redo_char_point)*sizeof(STB_TEXTEDIT_CHARTYPE)));
+         // adjust the position of all the other records to account for above memmove
+         for (i=state->redo_point; i < k; ++i)
+            if (state->undo_rec[i].char_storage >= 0)
+               state->undo_rec[i].char_storage += n;
+      }
+      // now move all the redo records towards the end of the buffer; the first one is at 'redo_point'
+      STB_TEXTEDIT_memmove(state->undo_rec + state->redo_point+1, state->undo_rec + state->redo_point, (size_t) ((STB_TEXTEDIT_UNDOSTATECOUNT - state->redo_point)*sizeof(state->undo_rec[0])));
+      // now move redo_point to point to the new one
+      ++state->redo_point;
+   }
+}
+
+static StbUndoRecord *stb_text_create_undo_record(StbUndoState *state, int numchars)
+{
+   // any time we create a new undo record, we discard redo
+   stb_textedit_flush_redo(state);
+
+   // if we have no free records, we have to make room, by sliding the
+   // existing records down
+   if (state->undo_point == STB_TEXTEDIT_UNDOSTATECOUNT)
+      stb_textedit_discard_undo(state);
+
+   // if the characters to store won't possibly fit in the buffer, we can't undo
+   if (numchars > STB_TEXTEDIT_UNDOCHARCOUNT) {
+      state->undo_point = 0;
+      state->undo_char_point = 0;
+      return NULL;
+   }
+
+   // if we don't have enough free characters in the buffer, we have to make room
+   while (state->undo_char_point + numchars > STB_TEXTEDIT_UNDOCHARCOUNT)
+      stb_textedit_discard_undo(state);
+
+   return &state->undo_rec[state->undo_point++];
+}
+
+static STB_TEXTEDIT_CHARTYPE *stb_text_createundo(StbUndoState *state, int pos, int insert_len, int delete_len)
+{
+   StbUndoRecord *r = stb_text_create_undo_record(state, insert_len);
+   if (r == NULL)
+      return NULL;
+
+   r->where = pos;
+   r->insert_length = (STB_TEXTEDIT_POSITIONTYPE) insert_len;
+   r->delete_length = (STB_TEXTEDIT_POSITIONTYPE) delete_len;
+
+   if (insert_len == 0) {
+      r->char_storage = -1;
+      return NULL;
+   } else {
+      r->char_storage = state->undo_char_point;
+      state->undo_char_point += insert_len;
+      return &state->undo_char[r->char_storage];
+   }
+}
+
+static void stb_text_undo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+{
+   StbUndoState *s = &state->undostate;
+   StbUndoRecord u, *r;
+   if (s->undo_point == 0)
+      return;
+
+   // we need to do two things: apply the undo record, and create a redo record
+   u = s->undo_rec[s->undo_point-1];
+   r = &s->undo_rec[s->redo_point-1];
+   r->char_storage = -1;
+
+   r->insert_length = u.delete_length;
+   r->delete_length = u.insert_length;
+   r->where = u.where;
+
+   if (u.delete_length) {
+      // if the undo record says to delete characters, then the redo record will
+      // need to re-insert the characters that get deleted, so we need to store
+      // them.
+
+      // there are three cases:
+      //    there's enough room to store the characters
+      //    characters stored for *redoing* don't leave room for redo
+      //    characters stored for *undoing* don't leave room for redo
+      // if the last is true, we have to bail
+
+      if (s->undo_char_point + u.delete_length >= STB_TEXTEDIT_UNDOCHARCOUNT) {
+         // the undo records take up too much character space; there's no space to store the redo characters
+         r->insert_length = 0;
+      } else {
+         int i;
+
+         // there's definitely room to store the characters eventually
+         while (s->undo_char_point + u.delete_length > s->redo_char_point) {
+            // should never happen:
+            if (s->redo_point == STB_TEXTEDIT_UNDOSTATECOUNT)
+               return;
+            // there's currently not enough room, so discard a redo record
+            stb_textedit_discard_redo(s);
+         }
+         r = &s->undo_rec[s->redo_point-1];
+
+         r->char_storage = s->redo_char_point - u.delete_length;
+         s->redo_char_point = s->redo_char_point - u.delete_length;
+
+         // now save the characters
+         for (i=0; i < u.delete_length; ++i)
+            s->undo_char[r->char_storage + i] = STB_TEXTEDIT_GETCHAR(str, u.where + i);
+      }
+
+      // now we can carry out the deletion
+      STB_TEXTEDIT_DELETECHARS(str, u.where, u.delete_length);
+   }
+
+   // check type of recorded action:
+   if (u.insert_length) {
+      // easy case: was a deletion, so we need to insert n characters
+      STB_TEXTEDIT_INSERTCHARS(str, u.where, &s->undo_char[u.char_storage], u.insert_length);
+      s->undo_char_point -= u.insert_length;
+   }
+
+   state->cursor = u.where + u.insert_length;
+
+   s->undo_point--;
+   s->redo_point--;
+}
+
+static void stb_text_redo(STB_TEXTEDIT_STRING *str, STB_TexteditState *state)
+{
+   StbUndoState *s = &state->undostate;
+   StbUndoRecord *u, r;
+   if (s->redo_point == STB_TEXTEDIT_UNDOSTATECOUNT)
+      return;
+
+   // we need to do two things: apply the redo record, and create an undo record
+   u = &s->undo_rec[s->undo_point];
+   r = s->undo_rec[s->redo_point];
+
+   // we KNOW there must be room for the undo record, because the redo record
+   // was derived from an undo record
+
+   u->delete_length = r.insert_length;
+   u->insert_length = r.delete_length;
+   u->where = r.where;
+   u->char_storage = -1;
+
+   if (r.delete_length) {
+      // the redo record requires us to delete characters, so the undo record
+      // needs to store the characters
+
+      if (s->undo_char_point + u->insert_length > s->redo_char_point) {
+         u->insert_length = 0;
+         u->delete_length = 0;
+      } else {
+         int i;
+         u->char_storage = s->undo_char_point;
+         s->undo_char_point = s->undo_char_point + u->insert_length;
+
+         // now save the characters
+         for (i=0; i < u->insert_length; ++i)
+            s->undo_char[u->char_storage + i] = STB_TEXTEDIT_GETCHAR(str, u->where + i);
+      }
+
+      STB_TEXTEDIT_DELETECHARS(str, r.where, r.delete_length);
+   }
+
+   if (r.insert_length) {
+      // easy case: need to insert n characters
+      STB_TEXTEDIT_INSERTCHARS(str, r.where, &s->undo_char[r.char_storage], r.insert_length);
+      s->redo_char_point += r.insert_length;
+   }
+
+   state->cursor = r.where + r.insert_length;
+
+   s->undo_point++;
+   s->redo_point++;
+}
+
+static void stb_text_makeundo_insert(STB_TexteditState *state, int where, int length)
+{
+   stb_text_createundo(&state->undostate, where, 0, length);
+}
+
+static void stb_text_makeundo_delete(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int length)
+{
+   int i;
+   STB_TEXTEDIT_CHARTYPE *p = stb_text_createundo(&state->undostate, where, length, 0);
+   if (p) {
+      for (i=0; i < length; ++i)
+         p[i] = STB_TEXTEDIT_GETCHAR(str, where+i);
+   }
+}
+
+static void stb_text_makeundo_replace(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, int where, int old_length, int new_length)
+{
+   int i;
+   STB_TEXTEDIT_CHARTYPE *p = stb_text_createundo(&state->undostate, where, old_length, new_length);
+   if (p) {
+      for (i=0; i < old_length; ++i)
+         p[i] = STB_TEXTEDIT_GETCHAR(str, where+i);
+   }
+}
+
+// reset the state to default
+static void stb_textedit_clear_state(STB_TexteditState *state, int is_single_line)
+{
+   state->undostate.undo_point = 0;
+   state->undostate.undo_char_point = 0;
+   state->undostate.redo_point = STB_TEXTEDIT_UNDOSTATECOUNT;
+   state->undostate.redo_char_point = STB_TEXTEDIT_UNDOCHARCOUNT;
+   state->select_end = state->select_start = 0;
+   state->cursor = 0;
+   state->has_preferred_x = 0;
+   state->preferred_x = 0;
+   state->cursor_at_end_of_line = 0;
+   state->initialized = 1;
+   state->single_line = (unsigned char) is_single_line;
+   state->insert_mode = 0;
+   state->row_count_per_page = 0;
+}
+
+// API initialize
+static void stb_textedit_initialize_state(STB_TexteditState *state, int is_single_line)
+{
+   stb_textedit_clear_state(state, is_single_line);
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+static int stb_textedit_paste(STB_TEXTEDIT_STRING *str, STB_TexteditState *state, STB_TEXTEDIT_CHARTYPE const *ctext, int len)
+{
+   return stb_textedit_paste_internal(str, state, (STB_TEXTEDIT_CHARTYPE *) ctext, len);
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#endif//STB_TEXTEDIT_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_tilemap_editor.h
@@ -1,0 +1,4187 @@
+// stb_tilemap_editor.h - v0.42 - Sean Barrett - http://nothings.org/stb
+// placed in the public domain - not copyrighted - first released 2014-09
+//
+// Embeddable tilemap editor for C/C++
+//
+//
+// TABLE OF CONTENTS
+//    FAQ
+//    How to compile/use the library
+//    Additional configuration macros
+//    API documentation
+//    Info on editing multiple levels
+//    Revision history
+//    Todo
+//    Credits
+//    License
+//
+//
+// FAQ
+//
+//   Q: What counts as a tilemap for this library?
+//
+//   A: An array of rectangles, where each rectangle contains a small
+//      stack of images.
+//
+//   Q: What are the limitations?
+//
+//   A: Maps are limited to 4096x4096 in dimension.
+//      Each map square can only contain a stack of at most 32 images.
+//      A map can only use up to 32768 distinct image tiles.
+//
+//   Q: How do I compile this?
+//
+//   A: You need to #define several symbols before #including it, but only
+//      in one file. This will cause all the function definitions to be
+//      generated in that file. See the "HOW TO COMPILE" section.
+//
+//   Q: What advantages does this have over a standalone editor?
+//
+//   A: For one, you can integrate the editor into your game so you can
+//      flip between editing and testing without even switching windows.
+//      For another, you don't need an XML parser to get at the map data.
+//
+//   Q: Can I live-edit my game maps?
+//
+//   A: Not really, the editor keeps its own map representation.
+//
+//   Q: How do I save and load maps?
+//
+//   A: You have to do this yourself. The editor provides serialization
+//      functions (get & set) for reading and writing the map it holds.
+//      You can choose whatever format you want to store the map to on
+//      disk; you just need to provide functions to convert. (For example,
+//      I actually store the editor's map representation to disk basically
+//      as-is; then I have a single function that converts from the editor
+//      map representation to the game representation, which is used both
+//      to go from editor-to-game and from loaded-map-to-game.)
+//
+//   Q: I want to have tiles change appearance based on what's
+//      adjacent, or other tile-display/substitution trickiness.
+//
+//   A: You can do this when you convert from the editor's map
+//      representation to the game representation, but there's
+//      no way to show this live in the editor.
+//
+//   Q: The editor appears to be put map location (0,0) at the top left?
+//      I want to use a different coordinate system in my game (e.g. y
+//      increasing upwards, or origin at the center).
+//
+//   A: You can do this when you convert from the editor's map
+//      representation to the game representation. (Don't forget to
+//      translate link coordinates as well!)
+//
+//   Q: The editor appears to put pixel (0,0) at the top left? I want
+//      to use a different coordinate system in my game.
+//
+//   A: The editor defines an "editor pixel coordinate system" with
+//      (0,0) at the top left and requires you to display things in
+//      that coordinate system. You can freely remap those coordinates
+//      to anything you want on screen.
+//
+//   Q: How do I scale the user interface?
+//
+//   A: Since you do all the rendering, you can scale up all the rendering
+//      calls that the library makes to you. If you do, (a) you need
+//      to also scale up the mouse coordinates, and (b) you may want
+//      to scale the map display back down so that you're only scaling
+//      the UI and not everything. See the next question.
+//
+//   Q: How do I scale the map display?
+//
+//   A: Use stbte_set_spacing() to change the size that the map is displayed
+//      at. Note that the "callbacks" to draw tiles are used for both drawing
+//      the map and drawing the tile palette, so that callback may need to
+//      draw at two different scales. You should choose the scales to match
+//       You can tell them apart because the
+//      tile palette gets NULL for the property pointer.
+//
+//   Q: How does object editing work?
+//
+//   A: One way to think of this is that in the editor, you're placing
+//      spawners, not objects. Each spawner must be tile-aligned, because
+//      it's only a tile editor. Each tile (stack of layers) gets
+//      an associated set of properties, and it's up to you to
+//      determine what properties should appear for a given tile,
+//      based on e.g. the spawners that are in it.
+//
+//   Q: How are properties themselves handled?
+//
+//   A: All properties, regardless of UI behavior, are internally floats.
+//      Each tile has an array of floats associated with it, which is
+//      passed back to you when drawing the tiles so you can draw
+//      objects appropriately modified by the properties.
+//
+//   Q: What if I want to have two different objects/spawners in
+//      one tile, both of which have their own properties?
+//
+//   A: Make sure STBTE_MAX_PROPERTIES is large enough for the sum of
+//      properties in both objects, and then you have to explicitly
+//      map the property slot #s to the appropriate objects. They'll
+//      still all appear in a single property panel; there's no way
+//      to get multiple panels.
+//
+//   Q: Can I do one-to-many linking?
+//
+//   A: The library only supports one link per tile. However, you
+//      can have multiple tiles all link to a single tile. So, you
+//      can fake one-to-many linking by linking in the reverse
+//      direction.
+//
+//   Q: What if I have two objects in the same tile, and they each
+//      need an independent link? Or I have two kinds of link associated
+//      with a single object?
+//
+//   A: There is no way to do this. (Unless you can reverse one link.)
+//
+//   Q: How does cut & paste interact with object properties & links?
+//
+//   A: Currently the library has no idea which properties or links
+//      are associated with which layers of a tile. So currently, the
+//      library will only copy properties & links if the layer panel
+//      is set to allow all layers to be copied, OR if you set the
+//      "props" in the layer panel to "always". Similarly, you can
+//      set "props" to "none" so it will never copy.
+//
+//   Q: What happens if the library gets a memory allocation failure
+//      while I'm editing? Will I lose my work?
+//
+//   A: The library allocates all editor memory when you create
+//      the tilemap. It allocates a maximally-sized map and a
+//      fixed-size undo buffer (and the fixed-size copy buffer
+//      is static), and never allocates memory while it's running.
+//      So it can't fail due to running out of memory.
+//
+//   Q: What happens if the library crashes while I'm editing? Will
+//      I lose my work?
+//
+//   A: Yes. Save often.
+//
+//
+// HOW TO COMPILE
+//
+//   This header file contains both the header file and the
+//   implementation file in one. To create the implementation,
+//   in one source file define a few symbols first and then
+//   include this header:
+//
+//      #define STB_TILEMAP_EDITOR_IMPLEMENTATION
+//      // this triggers the implementation
+//
+//      void STBTE_DRAW_RECT(int x0, int y0, int x1, int y1, unsigned int color);
+//      // this must draw a filled rectangle (exclusive on right/bottom)
+//      // color = (r<<16)|(g<<8)|(b)
+//
+//      void STBTE_DRAW_TILE(int x0, int y0,
+//                    unsigned short id, int highlight, float *data);
+//      // this draws the tile image identified by 'id' in one of several
+//      // highlight modes (see STBTE_drawmode_* in the header section);
+//      // if 'data' is NULL, it's drawing the tile in the palette; if 'data'
+//      // is not NULL, it's drawing a tile on the map, and that is the data
+//      // associated with that map tile
+//
+//      #include "stb_tilemap_editor.h"
+//
+//   Optionally you can define the following functions before the include;
+//   note these must be macros (but they can just call a function) so
+//   this library can #ifdef to detect if you've defined them:
+//
+//      #define STBTE_PROP_TYPE(int n, short *tiledata, float *params) ...
+//      // Returns the type of the n'th property of a given tile, which
+//      // controls how it is edited. Legal types are:
+//      //     0                    /* no editable property in this slot */
+//      //     STBTE_PROP_int       /* uses a slider to adjust value     */
+//      //     STBTE_PROP_float     /* uses a weird multi-axis control   */
+//      //     STBTE_PROP_bool      /* uses a checkbox to change value   */
+//      // And you can bitwise-OR in the following flags:
+//      //     STBTE_PROP_disabled
+//      // Note that all of these are stored as floats in the param array.
+//      // The integer slider is limited in precision based on the space
+//      // available on screen, so for wide-ranged integers you may want
+//      // to use floats instead.
+//      //
+//      // Since the tiledata is passed to you, you can choose which property
+//      // is bound to that slot based on that data.
+//      //
+//      // Changing the type of a parameter does not cause the underlying
+//      // value to be clamped to the type min/max except when the tile is
+//      // explicitly selected.
+//
+//      #define STBTE_PROP_NAME(int n, short *tiledata, float *params) ...
+//      // these return a string with the name for slot #n in the float
+//      // property list for the tile.
+//
+//      #define STBTE_PROP_MIN(int n, short *tiledata) ...your code here...
+//      #define STBTE_PROP_MAX(int n, short *tiledata) ...your code here...
+//      // These return the allowable range for the property values for
+//      // the specified slot. It is never called for boolean types.
+//
+//      #define STBTE_PROP_FLOAT_SCALE(int n, short *tiledata, float *params)
+//      // This rescales the float control for a given property; by default
+//      // left mouse drags add integers, right mouse drags adds fractions,
+//      // but you can rescale this per-property.
+//
+//      #define STBTE_FLOAT_CONTROL_GRANULARITY       ... value ...
+//      // This returns the number of pixels of mouse motion necessary
+//      // to advance the object float control. Default is 4
+//
+//      #define STBTE_ALLOW_LINK(short *src, float *src_data,  \
+//                               short *dest, float *dest_data) ...your code...
+//      // this returns true or false depending on whether you allow a link
+//      // to be drawn from a tile 'src' to a tile 'dest'. if you don't
+//      // define this, linking will not be supported
+//
+//      #define STBTE_LINK_COLOR(short *src, float *src_data,  \
+//                               short *dest, float *dest_data) ...your code...
+//      // return a color encoded as a 24-bit unsigned integer in the
+//      // form 0xRRGGBB. If you don't define this, default colors will
+//      // be used.
+//
+//
+//      [[ support for those below is not implemented yet ]]
+//
+//      #define STBTE_HITTEST_TILE(x0,y0,id,mx,my)   ...your code here...
+//      // this returns true or false depending on whether the mouse
+//      // pointer at mx,my is over (touching) a tile of type 'id'
+//      // displayed at x0,y0. Normally stb_tilemap_editor just does
+//      // this hittest based on the tile geometry, but if you have
+//      // tiles whose images extend out of the tile, you'll need this.
+//
+// ADDITIONAL CONFIGURATION
+//
+//   The following symbols set static limits which determine how much
+//   memory will be allocated for the editor. You can override them
+//   by making similar definitions, but memory usage will increase.
+//
+//      #define STBTE_MAX_TILEMAP_X      200   // max 4096
+//      #define STBTE_MAX_TILEMAP_Y      200   // max 4096
+//      #define STBTE_MAX_LAYERS         8     // max 32
+//      #define STBTE_MAX_CATEGORIES     100
+//      #define STBTE_UNDO_BUFFER_BYTES  (1 << 24) // 16 MB
+//      #define STBTE_MAX_COPY           90000  // e.g. 300x300
+//      #define STBTE_MAX_PROPERTIES     10     // max properties per tile
+//
+// API
+//
+//   Further documentation appears in the header-file section below.
+//
+// EDITING MULTIPLE LEVELS
+//
+//   You can only have one active editor instance. To switch between multiple
+//   levels, you can either store the levels in your own format and copy them
+//   in and out of the editor format, or you can create multiple stbte_tilemap
+//   objects and switch between them. The latter has the advantage that each
+//   stbte_tilemap keeps its own undo state. (The clipboard is global, so
+//   either approach allows cut&pasting between levels.)
+//
+// REVISION HISTORY
+//   0.42  fix compilation errors
+//   0.41  fix warnings
+//   0.40  fix warning
+//   0.39  fix warning
+//   0.38  fix warning
+//   0.37  fix warning
+//   0.36  minor compiler support
+//   0.35  layername button changes
+//          - layername buttons grow with the layer panel
+//          - fix stbte_create_map being declared as stbte_create
+//          - fix declaration of stbte_create_map
+//   0.30  properties release
+//          - properties panel for editing user-defined "object" properties
+//          - can link each tile to one other tile
+//          - keyboard interface
+//          - fix eraser tool bug (worked in complex cases, failed in simple)
+//          - undo/redo tools have visible disabled state
+//          - tiles on higher layers draw on top of adjacent lower-layer tiles
+//   0.20  erasable release
+//          - eraser tool
+//          - fix bug when pasting into protected layer
+//          - better color scheme
+//          - internal-use color picker
+//   0.10  initial release
+//
+// TODO
+//
+//   Separate scroll state for each category
+//   Implement paint bucket
+//   Support STBTE_HITTEST_TILE above
+//  ?Cancel drags by clicking other button? - may be fixed
+//   Finish support for toolbar at side
+//
+// CREDITS
+//
+//
+//   Main editor & features
+//      Sean Barrett
+//   Additional features:
+//      Josh Huelsman
+//   Bugfixes:
+//      Ryan Whitworth
+//      Eugene Opalev
+//      Rob Loach
+//      github:wernsey
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+
+
+///////////////////////////////////////////////////////////////////////
+//
+//   HEADER SECTION
+
+#ifndef STB_TILEMAP_INCLUDE_STB_TILEMAP_EDITOR_H
+#define STB_TILEMAP_INCLUDE_STB_TILEMAP_EDITOR_H
+
+#ifdef _WIN32
+  #ifndef _CRT_SECURE_NO_WARNINGS
+  #define _CRT_SECURE_NO_WARNINGS
+  #endif
+  #include <stdlib.h>
+  #include <stdio.h>
+#endif
+
+typedef struct stbte_tilemap stbte_tilemap;
+
+// these are the drawmodes used in STBTE_DRAW_TILE
+enum
+{
+   STBTE_drawmode_deemphasize = -1,
+   STBTE_drawmode_normal      =  0,
+   STBTE_drawmode_emphasize   =  1,
+};
+
+// these are the property types
+#define STBTE_PROP_none     0
+#define STBTE_PROP_int      1
+#define STBTE_PROP_float    2
+#define STBTE_PROP_bool     3
+#define STBTE_PROP_disabled 4
+
+////////
+//
+// creation
+//
+
+extern stbte_tilemap *stbte_create_map(int map_x, int map_y, int map_layers, int spacing_x, int spacing_y, int max_tiles);
+// create an editable tilemap
+//   map_x      : dimensions of map horizontally (user can change this in editor), <= STBTE_MAX_TILEMAP_X
+//   map_y      : dimensions of map vertically (user can change this in editor)    <= STBTE_MAX_TILEMAP_Y
+//   map_layers : number of layers to use (fixed), <= STBTE_MAX_LAYERS
+//   spacing_x  : initial horizontal distance between left edges of map tiles in stb_tilemap_editor pixels
+//   spacing_y  : initial vertical distance between top edges of map tiles in stb_tilemap_editor pixels
+//   max_tiles  : maximum number of tiles that can defined
+//
+// If insufficient memory, returns NULL
+
+extern void stbte_define_tile(stbte_tilemap *tm, unsigned short id, unsigned int layermask, const char * category);
+// call this repeatedly for each tile to install the tile definitions into the editable tilemap
+//   tm        : tilemap created by stbte_create_map
+//   id        : unique identifier for each tile, 0 <= id < 32768
+//   layermask : bitmask of which layers tile is allowed on: 1 = layer 0, 255 = layers 0..7
+//               (note that onscreen, the editor numbers the layers from 1 not 0)
+//               layer 0 is the furthest back, layer 1 is just in front of layer 0, etc
+//   category  : which category this tile is grouped in
+
+extern void stbte_set_display(int x0, int y0, int x1, int y1);
+// call this once to set the size; if you resize, call it again
+
+
+/////////
+//
+// every frame
+//
+
+extern void stbte_draw(stbte_tilemap *tm);
+
+extern void stbte_tick(stbte_tilemap *tm, float time_in_seconds_since_last_frame);
+
+////////////
+//
+//  user input
+//
+
+// if you're using SDL, call the next function for SDL_MOUSEMOTION, SDL_MOUSEBUTTONDOWN, SDL_MOUSEBUTTONUP, SDL_MOUSEWHEEL;
+// the transformation lets you scale from SDL mouse coords to stb_tilemap_editor coords
+extern void stbte_mouse_sdl(stbte_tilemap *tm, const void *sdl_event, float xscale, float yscale, int xoffset, int yoffset);
+
+// otherwise, hook these up explicitly:
+extern void stbte_mouse_move(stbte_tilemap *tm, int x, int y, int shifted, int scrollkey);
+extern void stbte_mouse_button(stbte_tilemap *tm, int x, int y, int right, int down, int shifted, int scrollkey);
+extern void stbte_mouse_wheel(stbte_tilemap *tm, int x, int y, int vscroll);
+
+// note: at the moment, mouse wheel events (SDL_MOUSEWHEEL) are ignored.
+
+// for keyboard, define your own mapping from keys to the following actions.
+// this is totally optional, as all features are accessible with the mouse
+enum stbte_action
+{
+   STBTE_tool_select,
+   STBTE_tool_brush,
+   STBTE_tool_erase,
+   STBTE_tool_rectangle,
+   STBTE_tool_eyedropper,
+   STBTE_tool_link,
+   STBTE_act_toggle_grid,
+   STBTE_act_toggle_links,
+   STBTE_act_undo,
+   STBTE_act_redo,
+   STBTE_act_cut,
+   STBTE_act_copy,
+   STBTE_act_paste,
+   STBTE_scroll_left,
+   STBTE_scroll_right,
+   STBTE_scroll_up,
+   STBTE_scroll_down,
+};
+extern void stbte_action(stbte_tilemap *tm, enum stbte_action act);
+
+////////////////
+//
+//  save/load
+//
+//  There is no editor file format. You have to save and load the data yourself
+//  through the following functions. You can also use these functions to get the
+//  data to generate game-formatted levels directly. (But make sure you save
+//  first! You may also want to autosave to a temp file periodically, etc etc.)
+
+#define STBTE_EMPTY    -1
+
+extern void stbte_get_dimensions(stbte_tilemap *tm, int *max_x, int *max_y);
+// get the dimensions of the level, since the user can change them
+
+extern short* stbte_get_tile(stbte_tilemap *tm, int x, int y);
+// returns an array of shorts that is 'map_layers' in length. each short is
+// either one of the tile_id values from define_tile, or STBTE_EMPTY.
+
+extern float *stbte_get_properties(stbte_tilemap *tm, int x, int y);
+// get the property array associated with the tile at x,y. this is an
+// array of floats that is STBTE_MAX_PROPERTIES in length; you have to
+// interpret the slots according to the semantics you've chosen
+
+extern void stbte_get_link(stbte_tilemap *tm, int x, int y, int *destx, int *desty);
+// gets the link associated with the tile at x,y.
+
+extern void stbte_set_dimensions(stbte_tilemap *tm, int max_x, int max_y);
+// set the dimensions of the level, overrides previous stbte_create_map()
+// values or anything the user has changed
+
+extern void stbte_clear_map(stbte_tilemap *tm);
+// clears the map, including the region outside the defined region, so if the
+// user expands the map, they won't see garbage there
+
+extern void stbte_set_tile(stbte_tilemap *tm, int x, int y, int layer, signed short tile);
+// tile is your tile_id from define_tile, or STBTE_EMPTY
+
+extern void stbte_set_property(stbte_tilemap *tm, int x, int y, int n, float val);
+// set the value of the n'th slot of the tile at x,y
+
+extern void stbte_set_link(stbte_tilemap *tm, int x, int y, int destx, int desty);
+// set a link going from x,y to destx,desty. to force no link,
+// use destx=desty=-1
+
+////////
+//
+// optional
+//
+
+extern void stbte_set_background_tile(stbte_tilemap *tm, short id);
+// selects the tile to fill the bottom layer with and used to clear bottom tiles to;
+// should be same ID as
+
+extern void stbte_set_sidewidths(int left, int right);
+// call this once to set the left & right side widths. don't call
+// it again since the user can change it
+
+extern void stbte_set_spacing(stbte_tilemap *tm, int spacing_x, int spacing_y, int palette_spacing_x, int palette_spacing_y);
+// call this to set the spacing of map tiles and the spacing of palette tiles.
+// if you rescale your display, call it again (e.g. you can implement map zooming yourself)
+
+extern void stbte_set_layername(stbte_tilemap *tm, int layer, const char *layername);
+// sets a string name for your layer that shows in the layer selector. note that this
+// makes the layer selector wider. 'layer' is from 0..(map_layers-1)
+
+#endif
+
+#ifdef STB_TILEMAP_EDITOR_IMPLEMENTATION
+
+#ifndef STBTE_ASSERT
+#define STBTE_ASSERT assert
+#include <assert.h>
+#endif
+
+#ifdef _MSC_VER
+#define STBTE__NOTUSED(v)  (void)(v)
+#else
+#define STBTE__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifndef STBTE_MAX_TILEMAP_X
+#define STBTE_MAX_TILEMAP_X      200
+#endif
+
+#ifndef STBTE_MAX_TILEMAP_Y
+#define STBTE_MAX_TILEMAP_Y      200
+#endif
+
+#ifndef STBTE_MAX_LAYERS
+#define STBTE_MAX_LAYERS         8
+#endif
+
+#ifndef STBTE_MAX_CATEGORIES
+#define STBTE_MAX_CATEGORIES     100
+#endif
+
+#ifndef STBTE_MAX_COPY
+#define STBTE_MAX_COPY           65536
+#endif
+
+#ifndef STBTE_UNDO_BUFFER_BYTES
+#define STBTE_UNDO_BUFFER_BYTES  (1 << 24) // 16 MB
+#endif
+
+#ifndef STBTE_PROP_TYPE
+#define STBTE__NO_PROPS
+#define STBTE_PROP_TYPE(n,td,tp)   0
+#endif
+
+#ifndef STBTE_PROP_NAME
+#define STBTE_PROP_NAME(n,td,tp)  ""
+#endif
+
+#ifndef STBTE_MAX_PROPERTIES
+#define STBTE_MAX_PROPERTIES           10
+#endif
+
+#ifndef STBTE_PROP_MIN
+#define STBTE_PROP_MIN(n,td,tp)  0
+#endif
+
+#ifndef STBTE_PROP_MAX
+#define STBTE_PROP_MAX(n,td,tp)  100.0
+#endif
+
+#ifndef STBTE_PROP_FLOAT_SCALE
+#define STBTE_PROP_FLOAT_SCALE(n,td,tp)  1   // default scale size
+#endif
+
+#ifndef STBTE_FLOAT_CONTROL_GRANULARITY
+#define STBTE_FLOAT_CONTROL_GRANULARITY 4
+#endif
+
+
+#define STBTE__UNDO_BUFFER_COUNT  (STBTE_UNDO_BUFFER_BYTES>>1)
+
+#if STBTE_MAX_TILEMAP_X > 4096 || STBTE_MAX_TILEMAP_Y > 4096
+#error "Maximum editable map size is 4096 x 4096"
+#endif
+#if STBTE_MAX_LAYERS > 32
+#error "Maximum layers allowed is 32"
+#endif
+#if STBTE_UNDO_BUFFER_COUNT & (STBTE_UNDO_BUFFER_COUNT-1)
+#error "Undo buffer size must be a power of 2"
+#endif
+
+#if STBTE_MAX_PROPERTIES == 0
+#define STBTE__NO_PROPS
+#endif
+
+#ifdef STBTE__NO_PROPS
+#undef STBTE_MAX_PROPERTIES
+#define STBTE_MAX_PROPERTIES 1  // so we can declare arrays
+#endif
+
+typedef struct
+{
+   short x,y;
+} stbte__link;
+
+enum
+{
+   STBTE__base,
+   STBTE__outline,
+   STBTE__text,
+
+   STBTE__num_color_aspects,
+};
+
+enum
+{
+   STBTE__idle,
+   STBTE__over,
+   STBTE__down,
+   STBTE__over_down,
+   STBTE__selected,
+   STBTE__selected_over,
+   STBTE__disabled,
+   STBTE__num_color_states,
+};
+
+enum
+{
+   STBTE__cexpander,
+   STBTE__ctoolbar,
+   STBTE__ctoolbar_button,
+   STBTE__cpanel,
+   STBTE__cpanel_sider,
+   STBTE__cpanel_sizer,
+   STBTE__cscrollbar,
+   STBTE__cmapsize,
+   STBTE__clayer_button,
+   STBTE__clayer_hide,
+   STBTE__clayer_lock,
+   STBTE__clayer_solo,
+   STBTE__ccategory_button,
+
+   STBTE__num_color_modes,
+};
+
+#ifdef STBTE__COLORPICKER
+static char *stbte__color_names[] =
+{
+   "expander", "toolbar", "tool button", "panel",
+   "panel c1", "panel c2", "scollbar", "map button",
+   "layer", "hide", "lock", "solo",
+   "category",
+};
+#endif // STBTE__COLORPICKER
+
+      // idle,    over,     down,    over&down, selected, sel&over, disabled
+static int stbte__color_table[STBTE__num_color_modes][STBTE__num_color_aspects][STBTE__num_color_states] =
+{
+   {
+      { 0x000000, 0x84987c, 0xdcdca8, 0xdcdca8, 0x40c040, 0x60d060, 0x505050, },
+      { 0xa4b090, 0xe0ec80, 0xffffc0, 0xffffc0, 0x80ff80, 0x80ff80, 0x606060, },
+      { 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x909090, },
+   }, {
+      { 0x808890, 0x606060, 0x606060, 0x606060, 0x606060, 0x606060, 0x606060, },
+      { 0x605860, 0x606060, 0x606060, 0x606060, 0x606060, 0x606060, 0x606060, },
+      { 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, },
+   }, {
+      { 0x3c5068, 0x7088a8, 0x647488, 0x94b4dc, 0x8890c4, 0x9caccc, 0x404040, },
+      { 0x889cb8, 0x889cb8, 0x889cb8, 0x889cb8, 0x84c4e8, 0xacc8ff, 0x0c0c08, },
+      { 0xbcc4cc, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x707074, },
+   }, {
+      { 0x403848, 0x403010, 0x403010, 0x403010, 0x403010, 0x403010, 0x303024, },
+      { 0x68546c, 0xc08040, 0xc08040, 0xc08040, 0xc08040, 0xc08040, 0x605030, },
+      { 0xf4e4ff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x909090, },
+   }, {
+      { 0xb4b04c, 0xacac60, 0xc0ffc0, 0xc0ffc0, 0x40c040, 0x60d060, 0x505050, },
+      { 0xa0a04c, 0xd0d04c, 0xffff80, 0xffff80, 0x80ff80, 0x80ff80, 0x606060, },
+      { 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x909090, },
+   }, {
+      { 0x40c440, 0x60d060, 0xc0ffc0, 0xc0ffc0, 0x40c040, 0x60d060, 0x505050, },
+      { 0x40c040, 0x80ff80, 0x80ff80, 0x80ff80, 0x80ff80, 0x80ff80, 0x606060, },
+      { 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x909090, },
+   }, {
+      { 0x9090ac, 0xa0a0b8, 0xbcb8cc, 0xbcb8cc, 0x909040, 0x909040, 0x909040, },
+      { 0xa0a0b8, 0xb0b4d0, 0xa0a0b8, 0xa0a0b8, 0xa0a050, 0xa0a050, 0xa0a050, },
+      { 0x808088, 0x808030, 0x808030, 0x808030, 0x808030, 0x808030, 0x808030, },
+   }, {
+      { 0x704c70, 0x885c8c, 0x9c68a4, 0xb870bc, 0xb490bc, 0xb490bc, 0x302828, },
+      { 0x646064, 0xcca8d4, 0xc060c0, 0xa07898, 0xe0b8e0, 0xe0b8e0, 0x403838, },
+      { 0xdccce4, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x909090, },
+   }, {
+      { 0x704c70, 0x885c8c, 0x9c68a4, 0xb870bc, 0xb490bc, 0xb490bc, 0x302828, },
+      { 0xb09cb4, 0xcca8d4, 0xc060c0, 0xa07898, 0xe0b8e0, 0xe0b8e0, 0x403838, },
+      { 0xdccce4, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0xffffff, 0x909090, },
+   }, {
+      { 0x646494, 0x888cb8, 0xb0b0b0, 0xb0b0cc, 0x9c9cf4, 0x8888b0, 0x50506c, },
+      { 0x9090a4, 0xb0b4d4, 0xb0b0dc, 0xb0b0cc, 0xd0d0fc, 0xd0d4f0, 0x606060, },
+      { 0xb4b4d4, 0xe4e4ff, 0xffffff, 0xffffff, 0xe0e4ff, 0xececff, 0x909090, },
+   }, {
+      { 0x646444, 0x888c64, 0xb0b0b0, 0xb0b088, 0xaca858, 0x88886c, 0x505050, },
+      { 0x88886c, 0xb0b490, 0xb0b0b0, 0xb0b088, 0xd8d898, 0xd0d4b0, 0x606060, },
+      { 0xb4b49c, 0xffffd8, 0xffffff, 0xffffd4, 0xffffdc, 0xffffcc, 0x909090, },
+   }, {
+      { 0x906464, 0xb48c8c, 0xd4b0b0, 0xdcb0b0, 0xff9c9c, 0xc88888, 0x505050, },
+      { 0xb47c80, 0xd4b4b8, 0xc4a8a8, 0xdcb0b0, 0xffc0c0, 0xfce8ec, 0x606060, },
+      { 0xe0b4b4, 0xffdcd8, 0xffd8d4, 0xffe0e4, 0xffece8, 0xffffff, 0x909090, },
+   }, {
+      { 0x403848, 0x403848, 0x403848, 0x886894, 0x7c80c8, 0x7c80c8, 0x302828, },
+      { 0x403848, 0x403848, 0x403848, 0x403848, 0x7c80c8, 0x7c80c8, 0x403838, },
+      { 0xc8c4c8, 0xffffff, 0xffffff, 0xffffff, 0xe8e8ec, 0xffffff, 0x909090, },
+   },
+};
+
+#define STBTE_COLOR_TILEMAP_BACKGROUND      0x000000
+#define STBTE_COLOR_TILEMAP_BORDER          0x203060
+#define STBTE_COLOR_TILEMAP_HIGHLIGHT       0xffffff
+#define STBTE_COLOR_GRID                    0x404040
+#define STBTE_COLOR_SELECTION_OUTLINE1      0xdfdfdf
+#define STBTE_COLOR_SELECTION_OUTLINE2      0x303030
+#define STBTE_COLOR_TILEPALETTE_OUTLINE     0xffffff
+#define STBTE_COLOR_TILEPALETTE_BACKGROUND  0x000000
+
+#ifndef STBTE_LINK_COLOR
+#define STBTE_LINK_COLOR(src,sp,dest,dp)    0x5030ff
+#endif
+
+#ifndef STBTE_LINK_COLOR_DRAWING
+#define STBTE_LINK_COLOR_DRAWING            0xff40ff
+#endif
+
+#ifndef STBTE_LINK_COLOR_DISALLOWED
+#define STBTE_LINK_COLOR_DISALLOWED         0x602060
+#endif
+
+
+// disabled, selected, down, over
+static unsigned char stbte__state_to_index[2][2][2][2] =
+{
+   {
+      { { STBTE__idle    , STBTE__over          }, { STBTE__down    , STBTE__over_down }, },
+      { { STBTE__selected, STBTE__selected_over }, { STBTE__down    , STBTE__over_down }, },
+   },{
+      { { STBTE__disabled, STBTE__disabled      }, { STBTE__disabled, STBTE__disabled  }, },
+      { { STBTE__selected, STBTE__selected_over }, { STBTE__disabled, STBTE__disabled  }, },
+   }
+};
+#define STBTE__INDEX_FOR_STATE(disable,select,down,over) stbte__state_to_index[disable][select][down][over]
+#define STBTE__INDEX_FOR_ID(id,disable,select) STBTE__INDEX_FOR_STATE(disable,select,STBTE__IS_ACTIVE(id),STBTE__IS_HOT(id))
+
+#define STBTE__FONT_HEIGHT    9
+static short stbte__font_offset[95+16];
+static short stbte__fontdata[769] =
+{
+   4,9,6,9,9,9,9,8,9,8,4,9,7,7,7,7,4,2,6,8,6,6,7,3,4,4,8,6,3,6,2,6,6,6,6,6,6,
+   6,6,6,6,6,2,3,5,4,5,6,6,6,6,6,6,6,6,6,6,6,6,7,6,7,7,7,6,7,6,6,6,6,7,7,6,6,
+   6,4,6,4,7,7,3,6,6,5,6,6,5,6,6,4,5,6,4,7,6,6,6,6,6,6,6,6,6,7,6,6,6,5,2,5,8,
+   0,0,0,0,2,253,130,456,156,8,72,184,64,2,125,66,64,160,64,146,511,146,146,
+   511,146,146,511,146,511,257,341,297,341,297,341,257,511,16,56,124,16,16,16,
+   124,56,16,96,144,270,261,262,136,80,48,224,192,160,80,40,22,14,15,3,448,496,
+   496,240,232,20,10,5,2,112,232,452,450,225,113,58,28,63,30,60,200,455,257,
+   257,0,0,0,257,257,455,120,204,132,132,159,14,4,4,14,159,132,132,204,120,8,
+   24,56,120,56,24,8,32,48,56,60,56,48,32,0,0,0,0,111,111,7,7,0,0,7,7,34,127,
+   127,34,34,127,127,34,36,46,107,107,58,18,99,51,24,12,102,99,48,122,79,93,
+   55,114,80,4,7,3,62,127,99,65,65,99,127,62,8,42,62,28,28,62,42,8,8,8,62,62,
+   8,8,128,224,96,8,8,8,8,8,8,96,96,96,48,24,12,6,3,62,127,89,77,127,62,64,66,
+   127,127,64,64,98,115,89,77,71,66,33,97,73,93,119,35,24,28,22,127,127,16,39,
+   103,69,69,125,57,62,127,73,73,121,48,1,1,113,121,15,7,54,127,73,73,127,54,
+   6,79,73,105,63,30,54,54,128,246,118,8,28,54,99,65,20,20,20,20,65,99,54,28,
+   8,2,3,105,109,7,2,30,63,33,45,47,46,124,126,19,19,126,124,127,127,73,73,127,
+   54,62,127,65,65,99,34,127,127,65,99,62,28,127,127,73,73,73,65,127,127,9,9,
+   9,1,62,127,65,73,121,121,127,127,8,8,127,127,65,65,127,127,65,65,32,96,64,
+   64,127,63,127,127,8,28,54,99,65,127,127,64,64,64,64,127,127,6,12,6,127,127,
+   127,127,6,12,24,127,127,62,127,65,65,65,127,62,127,127,9,9,15,6,62,127,65,
+   81,49,127,94,127,127,9,25,127,102,70,79,73,73,121,49,1,1,127,127,1,1,63,127,
+   64,64,127,63,15,31,48,96,48,31,15,127,127,48,24,48,127,127,99,119,28,28,119,
+   99,7,15,120,120,15,7,97,113,89,77,71,67,127,127,65,65,3,6,12,24,48,96,65,
+   65,127,127,8,12,6,3,6,12,8,64,64,64,64,64,64,64,3,7,4,32,116,84,84,124,120,
+   127,127,68,68,124,56,56,124,68,68,68,56,124,68,68,127,127,56,124,84,84,92,
+   24,8,124,126,10,10,56,380,324,324,508,252,127,127,4,4,124,120,72,122,122,
+   64,256,256,256,506,250,126,126,16,56,104,64,66,126,126,64,124,124,24,56,28,
+   124,120,124,124,4,4,124,120,56,124,68,68,124,56,508,508,68,68,124,56,56,124,
+   68,68,508,508,124,124,4,4,12,8,72,92,84,84,116,36,4,4,62,126,68,68,60,124,
+   64,64,124,124,28,60,96,96,60,28,28,124,112,56,112,124,28,68,108,56,56,108,
+   68,284,316,352,320,508,252,68,100,116,92,76,68,8,62,119,65,65,127,127,65,
+   65,119,62,8,16,24,12,12,24,24,12,4,
+};
+
+typedef struct
+{
+   short id;
+   unsigned short category_id;
+   char *category;
+   unsigned int layermask;
+} stbte__tileinfo;
+
+#define MAX_LAYERMASK    (1 << (8*sizeof(unsigned int)))
+
+typedef short stbte__tiledata;
+
+#define STBTE__NO_TILE   -1
+
+enum
+{
+   STBTE__panel_toolbar,
+   STBTE__panel_colorpick,
+   STBTE__panel_info,
+   STBTE__panel_layers,
+   STBTE__panel_props,
+   STBTE__panel_categories,
+   STBTE__panel_tiles,
+
+   STBTE__num_panel,
+};
+
+enum
+{
+   STBTE__side_left,
+   STBTE__side_right,
+   STBTE__side_top,
+   STBTE__side_bottom,
+};
+
+enum
+{
+   STBTE__tool_select,
+   STBTE__tool_brush,
+   STBTE__tool_erase,
+   STBTE__tool_rect,
+   STBTE__tool_eyedrop,
+   STBTE__tool_fill,
+   STBTE__tool_link,
+
+   STBTE__tool_showgrid,
+   STBTE__tool_showlinks,
+
+   STBTE__tool_undo,
+   STBTE__tool_redo,
+   // copy/cut/paste aren't included here because they're displayed differently
+
+   STBTE__num_tool,
+};
+
+// icons are stored in the 0-31 range of ASCII in the font
+static int toolchar[] = { 26,24,25,20,23,22,18, 19,17, 29,28, };
+
+enum
+{
+   STBTE__propmode_default,
+   STBTE__propmode_always,
+   STBTE__propmode_never,
+};
+
+enum
+{
+   STBTE__paint,
+
+   // from here down does hittesting
+   STBTE__tick,
+   STBTE__mousemove,
+   STBTE__mousewheel,
+   STBTE__leftdown,
+   STBTE__leftup,
+   STBTE__rightdown,
+   STBTE__rightup,
+};
+
+typedef struct
+{
+   int expanded, mode;
+   int delta_height;     // number of rows they've requested for this
+   int side;
+   int width,height;
+   int x0,y0;
+} stbte__panel;
+
+typedef struct
+{
+   int x0,y0,x1,y1,color;
+} stbte__colorrect;
+
+#define STBTE__MAX_DELAYRECT 256
+
+typedef struct
+{
+   int tool, active_event;
+   int active_id, hot_id, next_hot_id;
+   int event;
+   int mx,my, dx,dy;
+   int ms_time;
+   int shift, scrollkey;
+   int initted;
+   int side_extended[2];
+   stbte__colorrect delayrect[STBTE__MAX_DELAYRECT];
+   int delaycount;
+   int show_grid, show_links;
+   int brush_state; // used to decide which kind of erasing
+   int eyedrop_x, eyedrop_y, eyedrop_last_layer;
+   int pasting, paste_x, paste_y;
+   int scrolling, start_x, start_y;
+   int last_mouse_x, last_mouse_y;
+   int accum_x, accum_y;
+   int linking;
+   int dragging;
+   int drag_x, drag_y, drag_w, drag_h;
+   int drag_offx, drag_offy, drag_dest_x, drag_dest_y;
+   int undoing;
+   int has_selection, select_x0, select_y0, select_x1, select_y1;
+   int sx,sy;
+   int x0,y0,x1,y1, left_width, right_width; // configurable widths
+   float alert_timer;
+   const char *alert_msg;
+   float dt;
+   stbte__panel panel[STBTE__num_panel];
+   short copybuffer[STBTE_MAX_COPY][STBTE_MAX_LAYERS];
+   float copyprops[STBTE_MAX_COPY][STBTE_MAX_PROPERTIES];
+#ifdef STBTE_ALLOW_LINK
+   stbte__link copylinks[STBTE_MAX_COPY];
+#endif
+   int copy_src_x, copy_src_y;
+   stbte_tilemap *copy_src;
+   int copy_width,copy_height,has_copy,copy_has_props;
+} stbte__ui_t;
+
+// there's only one UI system at a time, so we can globalize this
+static stbte__ui_t stbte__ui = { STBTE__tool_brush, 0 };
+
+#define STBTE__INACTIVE()     (stbte__ui.active_id == 0)
+#define STBTE__IS_ACTIVE(id)  (stbte__ui.active_id == (id))
+#define STBTE__IS_HOT(id)     (stbte__ui.hot_id    == (id))
+
+#define STBTE__BUTTON_HEIGHT            (STBTE__FONT_HEIGHT + 2 * STBTE__BUTTON_INTERNAL_SPACING)
+#define STBTE__BUTTON_INTERNAL_SPACING  (2 + (STBTE__FONT_HEIGHT>>4))
+
+typedef struct
+{
+   const char *name;
+   int locked;
+   int hidden;
+} stbte__layer;
+
+enum
+{
+   STBTE__unlocked,
+   STBTE__protected,
+   STBTE__locked,
+};
+
+struct stbte_tilemap
+{
+    stbte__tiledata data[STBTE_MAX_TILEMAP_Y][STBTE_MAX_TILEMAP_X][STBTE_MAX_LAYERS];
+    float props[STBTE_MAX_TILEMAP_Y][STBTE_MAX_TILEMAP_X][STBTE_MAX_PROPERTIES];
+    #ifdef STBTE_ALLOW_LINK
+    stbte__link link[STBTE_MAX_TILEMAP_Y][STBTE_MAX_TILEMAP_X];
+    int linkcount[STBTE_MAX_TILEMAP_Y][STBTE_MAX_TILEMAP_X];
+    #endif
+    int max_x, max_y, num_layers;
+    int spacing_x, spacing_y;
+    int palette_spacing_x, palette_spacing_y;
+    int scroll_x,scroll_y;
+    int cur_category, cur_tile, cur_layer;
+    char *categories[STBTE_MAX_CATEGORIES];
+    int num_categories, category_scroll;
+    stbte__tileinfo *tiles;
+    int num_tiles, max_tiles, digits;
+    unsigned char undo_available_valid;
+    unsigned char undo_available;
+    unsigned char redo_available;
+    unsigned char padding;
+    int cur_palette_count;
+    int palette_scroll;
+    int tileinfo_dirty;
+    stbte__layer layerinfo[STBTE_MAX_LAYERS];
+    int has_layer_names;
+    int layername_width;
+    int layer_scroll;
+    int propmode;
+    int solo_layer;
+    int undo_pos, undo_len, redo_len;
+    short background_tile;
+    unsigned char id_in_use[32768>>3];
+    short *undo_buffer;
+};
+
+static char *default_category = (char*) "[unassigned]";
+
+static void stbte__init_gui(void)
+{
+   int i,n;
+   stbte__ui.initted = 1;
+   // init UI state
+   stbte__ui.show_links = 1;
+   for (i=0; i < STBTE__num_panel; ++i) {
+      stbte__ui.panel[i].expanded     = 1; // visible if not autohidden
+      stbte__ui.panel[i].delta_height = 0;
+      stbte__ui.panel[i].side         = STBTE__side_left;
+   }
+   stbte__ui.panel[STBTE__panel_toolbar  ].side = STBTE__side_top;
+   stbte__ui.panel[STBTE__panel_colorpick].side = STBTE__side_right;
+
+   if (stbte__ui.left_width == 0)
+      stbte__ui.left_width = 80;
+   if (stbte__ui.right_width == 0)
+      stbte__ui.right_width = 80;
+
+   // init font
+   n=95+16;
+   for (i=0; i < 95+16; ++i) {
+      stbte__font_offset[i] = n;
+      n += stbte__fontdata[i];
+   }
+}
+
+stbte_tilemap *stbte_create_map(int map_x, int map_y, int map_layers, int spacing_x, int spacing_y, int max_tiles)
+{
+   int i;
+   stbte_tilemap *tm;
+   STBTE_ASSERT(map_layers >= 0 && map_layers <= STBTE_MAX_LAYERS);
+   STBTE_ASSERT(map_x >= 0 && map_x <= STBTE_MAX_TILEMAP_X);
+   STBTE_ASSERT(map_y >= 0 && map_y <= STBTE_MAX_TILEMAP_Y);
+   if (map_x < 0 || map_y < 0 || map_layers < 0 ||
+       map_x > STBTE_MAX_TILEMAP_X || map_y > STBTE_MAX_TILEMAP_Y || map_layers > STBTE_MAX_LAYERS)
+      return NULL;
+
+   if (!stbte__ui.initted)
+      stbte__init_gui();
+
+   tm = (stbte_tilemap *) malloc(sizeof(*tm) + sizeof(*tm->tiles) * max_tiles + STBTE_UNDO_BUFFER_BYTES);
+   if (tm == NULL)
+      return NULL;
+
+   tm->tiles = (stbte__tileinfo *) (tm+1);
+   tm->undo_buffer = (short *) (tm->tiles + max_tiles);
+   tm->num_layers = map_layers;
+   tm->max_x = map_x;
+   tm->max_y = map_y;
+   tm->spacing_x = spacing_x;
+   tm->spacing_y = spacing_y;
+   tm->scroll_x = 0;
+   tm->scroll_y = 0;
+   tm->palette_scroll = 0;
+   tm->palette_spacing_x = spacing_x+1;
+   tm->palette_spacing_y = spacing_y+1;
+   tm->cur_category = -1;
+   tm->cur_tile = 0;
+   tm->solo_layer = -1;
+   tm->undo_len = 0;
+   tm->redo_len = 0;
+   tm->undo_pos = 0;
+   tm->category_scroll = 0;
+   tm->layer_scroll = 0;
+   tm->propmode = 0;
+   tm->has_layer_names = 0;
+   tm->layername_width = 0;
+   tm->undo_available_valid = 0;
+
+   for (i=0; i < tm->num_layers; ++i) {
+      tm->layerinfo[i].hidden = 0;
+      tm->layerinfo[i].locked = STBTE__unlocked;
+      tm->layerinfo[i].name   = 0;
+   }
+
+   tm->background_tile = STBTE__NO_TILE;
+   stbte_clear_map(tm);
+
+   tm->max_tiles = max_tiles;
+   tm->num_tiles = 0;
+   for (i=0; i < 32768/8; ++i)
+      tm->id_in_use[i] = 0;
+   tm->tileinfo_dirty = 1;
+   return tm;
+}
+
+void stbte_set_background_tile(stbte_tilemap *tm, short id)
+{
+   int i;
+   STBTE_ASSERT(id >= -1);
+   // STBTE_ASSERT(id < 32768);
+   if (id < -1)
+      return;
+   for (i=0; i < STBTE_MAX_TILEMAP_X * STBTE_MAX_TILEMAP_Y; ++i)
+      if (tm->data[0][i][0] == -1)
+         tm->data[0][i][0] = id;
+   tm->background_tile = id;
+}
+
+void stbte_set_spacing(stbte_tilemap *tm, int spacing_x, int spacing_y, int palette_spacing_x, int palette_spacing_y)
+{
+   tm->spacing_x = spacing_x;
+   tm->spacing_y = spacing_y;
+   tm->palette_spacing_x = palette_spacing_x;
+   tm->palette_spacing_y = palette_spacing_y;
+}
+
+void stbte_set_sidewidths(int left, int right)
+{
+   stbte__ui.left_width  = left;
+   stbte__ui.right_width = right;
+}
+
+void stbte_set_display(int x0, int y0, int x1, int y1)
+{
+   stbte__ui.x0 = x0;
+   stbte__ui.y0 = y0;
+   stbte__ui.x1 = x1;
+   stbte__ui.y1 = y1;
+}
+
+void stbte_define_tile(stbte_tilemap *tm, unsigned short id, unsigned int layermask, const char * category_c)
+{
+   char *category = (char *) category_c;
+   STBTE_ASSERT(id < 32768);
+   STBTE_ASSERT(tm->num_tiles < tm->max_tiles);
+   STBTE_ASSERT((tm->id_in_use[id>>3]&(1<<(id&7))) == 0);
+   if (id >= 32768 || tm->num_tiles >= tm->max_tiles || (tm->id_in_use[id>>3]&(1<<(id&7))))
+      return;
+
+   if (category == NULL)
+      category = (char*) default_category;
+   tm->id_in_use[id>>3] |= 1 << (id&7);
+   tm->tiles[tm->num_tiles].category    = category;
+   tm->tiles[tm->num_tiles].id        = id;
+   tm->tiles[tm->num_tiles].layermask = layermask;
+   ++tm->num_tiles;
+   tm->tileinfo_dirty = 1;
+}
+
+static int stbte__text_width(const char *str);
+
+void stbte_set_layername(stbte_tilemap *tm, int layer, const char *layername)
+{
+   STBTE_ASSERT(layer >= 0 && layer < tm->num_layers);
+   if (layer >= 0 && layer < tm->num_layers) {
+      int width;
+      tm->layerinfo[layer].name = layername;
+      tm->has_layer_names = 1;
+      width = stbte__text_width(layername);
+      tm->layername_width = (width > tm->layername_width ? width : tm->layername_width);
+   }
+}
+
+void stbte_get_dimensions(stbte_tilemap *tm, int *max_x, int *max_y)
+{
+   *max_x = tm->max_x;
+   *max_y = tm->max_y;
+}
+
+short* stbte_get_tile(stbte_tilemap *tm, int x, int y)
+{
+   STBTE_ASSERT(x >= 0 && x < tm->max_x && y >= 0 && y < tm->max_y);
+   if (x < 0 || x >= STBTE_MAX_TILEMAP_X || y < 0 || y >= STBTE_MAX_TILEMAP_Y)
+      return NULL;
+   return tm->data[y][x];
+}
+
+float *stbte_get_properties(stbte_tilemap *tm, int x, int y)
+{
+   STBTE_ASSERT(x >= 0 && x < tm->max_x && y >= 0 && y < tm->max_y);
+   if (x < 0 || x >= STBTE_MAX_TILEMAP_X || y < 0 || y >= STBTE_MAX_TILEMAP_Y)
+      return NULL;
+   return tm->props[y][x];
+}
+
+void stbte_get_link(stbte_tilemap *tm, int x, int y, int *destx, int *desty)
+{
+   int gx=-1,gy=-1;
+   STBTE_ASSERT(x >= 0 && x < tm->max_x && y >= 0 && y < tm->max_y);
+#ifdef STBTE_ALLOW_LINK
+   if (x >= 0 && x < STBTE_MAX_TILEMAP_X && y >= 0 && y < STBTE_MAX_TILEMAP_Y) {
+      gx = tm->link[y][x].x;
+      gy = tm->link[y][x].y;
+      if (gx >= 0)
+         if (!STBTE_ALLOW_LINK(tm->data[y][x], tm->props[y][x], tm->data[gy][gx], tm->props[gy][gx]))
+            gx = gy = -1;
+   }
+#endif
+   *destx = gx;
+   *desty = gy;
+}
+
+void stbte_set_property(stbte_tilemap *tm, int x, int y, int n, float val)
+{
+   tm->props[y][x][n] = val;
+}
+
+#ifdef STBTE_ALLOW_LINK
+static void stbte__set_link(stbte_tilemap *tm, int src_x, int src_y, int dest_x, int dest_y, int undo_mode);
+#endif
+
+enum
+{
+   STBTE__undo_none,
+   STBTE__undo_record,
+   STBTE__undo_block,
+};
+
+void stbte_set_link(stbte_tilemap *tm, int x, int y, int destx, int desty)
+{
+#ifdef STBTE_ALLOW_LINK
+   stbte__set_link(tm, x, y, destx, desty, STBTE__undo_none);
+#else
+   STBTE_ASSERT(0);
+#endif
+}
+
+
+// returns an array of map_layers shorts. each short is either
+// one of the tile_id values from define_tile, or STBTE_EMPTY
+
+void stbte_set_dimensions(stbte_tilemap *tm, int map_x, int map_y)
+{
+   STBTE_ASSERT(map_x >= 0 && map_x <= STBTE_MAX_TILEMAP_X);
+   STBTE_ASSERT(map_y >= 0 && map_y <= STBTE_MAX_TILEMAP_Y);
+   if (map_x < 0 || map_y < 0 || map_x > STBTE_MAX_TILEMAP_X || map_y > STBTE_MAX_TILEMAP_Y)
+      return;
+   tm->max_x = map_x;
+   tm->max_y = map_y;
+}
+
+void stbte_clear_map(stbte_tilemap *tm)
+{
+   int i,j;
+   for (i=0; i < STBTE_MAX_TILEMAP_X * STBTE_MAX_TILEMAP_Y; ++i) {
+      tm->data[0][i][0] = tm->background_tile;
+      for (j=1; j < tm->num_layers; ++j)
+         tm->data[0][i][j] = STBTE__NO_TILE;
+      for (j=0; j < STBTE_MAX_PROPERTIES; ++j)
+         tm->props[0][i][j] = 0;
+      #ifdef STBTE_ALLOW_LINK
+      tm->link[0][i].x = -1;
+      tm->link[0][i].y = -1;
+      tm->linkcount[0][i] = 0;
+      #endif
+   }
+}
+
+void stbte_set_tile(stbte_tilemap *tm, int x, int y, int layer, signed short tile)
+{
+   STBTE_ASSERT(x >= 0 && x < tm->max_x && y >= 0 && y < tm->max_y);
+   STBTE_ASSERT(layer >= 0 && layer < tm->num_layers);
+   STBTE_ASSERT(tile >= -1);
+   //STBTE_ASSERT(tile < 32768);
+   if (x < 0 || x >= STBTE_MAX_TILEMAP_X || y < 0 || y >= STBTE_MAX_TILEMAP_Y)
+      return;
+   if (layer < 0 || layer >= tm->num_layers || tile < -1)
+      return;
+   tm->data[y][x][layer] = tile;
+}
+
+static void stbte__choose_category(stbte_tilemap *tm, int category)
+{
+   int i,n=0;
+   tm->cur_category = category;
+   for (i=0; i < tm->num_tiles; ++i)
+      if (tm->tiles[i].category_id == category || category == -1)
+         ++n;
+   tm->cur_palette_count = n;
+   tm->palette_scroll = 0;
+}
+
+static int stbte__strequal(char *p, char *q)
+{
+   while (*p)
+      if (*p++ != *q++) return 0;
+   return *q == 0;
+}
+
+static void stbte__compute_tileinfo(stbte_tilemap *tm)
+{
+   int i,j;
+
+   tm->num_categories=0;
+
+   for (i=0; i < tm->num_tiles; ++i) {
+      stbte__tileinfo *t = &tm->tiles[i];
+      // find category
+      for (j=0; j < tm->num_categories; ++j)
+         if (stbte__strequal(t->category, tm->categories[j]))
+            goto found;
+      tm->categories[j] = t->category;
+      ++tm->num_categories;
+     found:
+      t->category_id = (unsigned short) j;
+   }
+
+   // currently number of categories can never decrease because you
+   // can't remove tile definitions, but let's get it right anyway
+   if (tm->cur_category > tm->num_categories) {
+      tm->cur_category = -1;
+   }
+
+   stbte__choose_category(tm, tm->cur_category);
+
+   tm->tileinfo_dirty = 0;
+}
+
+static void stbte__prepare_tileinfo(stbte_tilemap *tm)
+{
+   if (tm->tileinfo_dirty)
+      stbte__compute_tileinfo(tm);
+}
+
+
+/////////////////////// undo system ////////////////////////
+
+// the undo system works by storing "commands" into a buffer, and
+// then playing back those commands. undo and redo have to store
+// the commands in different order.
+//
+// the commands are:
+//
+// 1)  end_of_undo_record
+//       -1:short
+//
+// 2)  end_of_redo_record
+//       -2:short
+//
+// 3)  tile update
+//       tile_id:short (-1..32767)
+//       x_coord:short
+//       y_coord:short
+//       layer:short (0..31)
+//
+// 4)  property update (also used for links)
+//       value_hi:short
+//       value_lo:short
+//       y_coord:short
+//       x_coord:short
+//       property:short (256+prop#)
+//
+// Since we use a circular buffer, we might overwrite the undo storage.
+// To detect this, before playing back commands we scan back and see
+// if we see an end_of_undo_record before hitting the relevant boundary,
+// it's wholly contained.
+//
+// When we read back through, we see them in reverse order, so
+// we'll see the layer number or property number first
+//
+// To be clearer about the circular buffer, there are two cases:
+//     1. a single record is larger than the whole buffer.
+//        this is caught because the end_of_undo_record will
+//        get overwritten.
+//     2. multiple records written are larger than the whole
+//        buffer, so some of them have been overwritten by
+//        the later ones. this is handled by explicitly tracking
+//        the undo length; we never try to parse the data that
+//        got overwritten
+
+// given two points, compute the length between them
+#define stbte__wrap(pos)            ((pos) & (STBTE__UNDO_BUFFER_COUNT-1))
+
+#define STBTE__undo_record  -2
+#define STBTE__redo_record  -3
+#define STBTE__undo_junk    -4  // this is written underneath the undo pointer, never used
+
+static void stbte__write_undo(stbte_tilemap *tm, short value)
+{
+   int pos = tm->undo_pos;
+   tm->undo_buffer[pos] = value;
+   tm->undo_pos = stbte__wrap(pos+1);
+   tm->undo_len += (tm->undo_len < STBTE__UNDO_BUFFER_COUNT-2);
+   tm->redo_len -= (tm->redo_len > 0);
+   tm->undo_available_valid = 0;
+}
+
+static void stbte__write_redo(stbte_tilemap *tm, short value)
+{
+   int pos = tm->undo_pos;
+   tm->undo_buffer[pos] = value;
+   tm->undo_pos = stbte__wrap(pos-1);
+   tm->redo_len += (tm->redo_len < STBTE__UNDO_BUFFER_COUNT-2);
+   tm->undo_len -= (tm->undo_len > 0);
+   tm->undo_available_valid = 0;
+}
+
+static void stbte__begin_undo(stbte_tilemap *tm)
+{
+   tm->redo_len = 0;
+   stbte__write_undo(tm, STBTE__undo_record);
+   stbte__ui.undoing = 1;
+   stbte__ui.alert_msg = 0; // clear alert if they start doing something
+}
+
+static void stbte__end_undo(stbte_tilemap *tm)
+{
+   if (stbte__ui.undoing) {
+      // check if anything got written
+      int pos = stbte__wrap(tm->undo_pos-1);
+      if (tm->undo_buffer[pos] == STBTE__undo_record) {
+         // empty undo record, move back
+         tm->undo_pos = pos;
+         STBTE_ASSERT(tm->undo_len > 0);
+         tm->undo_len -= 1;
+      }
+      tm->undo_buffer[tm->undo_pos] = STBTE__undo_junk;
+      // otherwise do nothing
+
+      stbte__ui.undoing = 0;
+   }
+}
+
+static void stbte__undo_record(stbte_tilemap *tm, int x, int y, int i, int v)
+{
+   STBTE_ASSERT(stbte__ui.undoing);
+   if (stbte__ui.undoing) {
+      stbte__write_undo(tm, v);
+      stbte__write_undo(tm, x);
+      stbte__write_undo(tm, y);
+      stbte__write_undo(tm, i);
+   }
+}
+
+static void stbte__redo_record(stbte_tilemap *tm, int x, int y, int i, int v)
+{
+   stbte__write_redo(tm, v);
+   stbte__write_redo(tm, x);
+   stbte__write_redo(tm, y);
+   stbte__write_redo(tm, i);
+}
+
+static float stbte__extract_float(short s0, short s1)
+{
+   union { float f; short s[2]; } converter;
+   converter.s[0] = s0;
+   converter.s[1] = s1;
+   return converter.f;
+}
+
+static short stbte__extract_short(float f, int slot)
+{
+   union { float f; short s[2]; } converter;
+   converter.f = f;
+   return converter.s[slot];
+}
+
+static void stbte__undo_record_prop(stbte_tilemap *tm, int x, int y, int i, short s0, short s1)
+{
+   STBTE_ASSERT(stbte__ui.undoing);
+   if (stbte__ui.undoing) {
+      stbte__write_undo(tm, s1);
+      stbte__write_undo(tm, s0);
+      stbte__write_undo(tm, x);
+      stbte__write_undo(tm, y);
+      stbte__write_undo(tm, 256+i);
+   }
+}
+
+static void stbte__undo_record_prop_float(stbte_tilemap *tm, int x, int y, int i, float f)
+{
+   stbte__undo_record_prop(tm, x,y,i, stbte__extract_short(f,0), stbte__extract_short(f,1));
+}
+
+static void stbte__redo_record_prop(stbte_tilemap *tm, int x, int y, int i, short s0, short s1)
+{
+   stbte__write_redo(tm, s1);
+   stbte__write_redo(tm, s0);
+   stbte__write_redo(tm, x);
+   stbte__write_redo(tm, y);
+   stbte__write_redo(tm, 256+i);
+}
+
+
+static int stbte__undo_find_end(stbte_tilemap *tm)
+{
+   // first scan through for the end record
+   int i, pos = stbte__wrap(tm->undo_pos-1);
+   for (i=0; i < tm->undo_len;) {
+      STBTE_ASSERT(tm->undo_buffer[pos] != STBTE__undo_junk);
+      if (tm->undo_buffer[pos] == STBTE__undo_record)
+         break;
+      if (tm->undo_buffer[pos] >= 255)
+         pos = stbte__wrap(pos-5), i += 5;
+      else
+         pos = stbte__wrap(pos-4), i += 4;
+   }
+   if (i >= tm->undo_len)
+      return -1;
+   return pos;
+}
+
+static void stbte__undo(stbte_tilemap *tm)
+{
+   int i, pos, endpos;
+   endpos = stbte__undo_find_end(tm);
+   if (endpos < 0)
+      return;
+
+   // we found a complete undo record
+   pos = stbte__wrap(tm->undo_pos-1);
+
+   // start a redo record
+   stbte__write_redo(tm, STBTE__redo_record);
+
+   // so now go back through undo and apply in reverse
+   // order, and copy it to redo
+   for (i=0; endpos != pos; i += 4) {
+      int x,y,n,v;
+      // get the undo entry
+      n = tm->undo_buffer[pos];
+      y = tm->undo_buffer[stbte__wrap(pos-1)];
+      x = tm->undo_buffer[stbte__wrap(pos-2)];
+      v = tm->undo_buffer[stbte__wrap(pos-3)];
+      if (n >= 255) {
+         short s0=0,s1=0;
+         int v2 = tm->undo_buffer[stbte__wrap(pos-4)];
+         pos = stbte__wrap(pos-5);
+         if (n > 255) {
+            float vf = stbte__extract_float(v, v2);
+            s0 = stbte__extract_short(tm->props[y][x][n-256], 0);
+            s1 = stbte__extract_short(tm->props[y][x][n-256], 1);
+            tm->props[y][x][n-256] = vf;
+         } else {
+#ifdef STBTE_ALLOW_LINK
+            s0 = tm->link[y][x].x;
+            s1 = tm->link[y][x].y;
+            stbte__set_link(tm, x,y, v, v2, STBTE__undo_none);
+#endif
+         }
+         // write the redo entry
+         stbte__redo_record_prop(tm, x, y, n-256, s0,s1);
+         // apply the undo entry
+      } else {
+         pos = stbte__wrap(pos-4);
+         // write the redo entry
+         stbte__redo_record(tm, x, y, n, tm->data[y][x][n]);
+         // apply the undo entry
+         tm->data[y][x][n] = (short) v;
+      }
+   }
+   // overwrite undo record with junk
+   tm->undo_buffer[tm->undo_pos] = STBTE__undo_junk;
+}
+
+static int stbte__redo_find_end(stbte_tilemap *tm)
+{
+   // first scan through for the end record
+   int i, pos = stbte__wrap(tm->undo_pos+1);
+   for (i=0; i < tm->redo_len;) {
+      STBTE_ASSERT(tm->undo_buffer[pos] != STBTE__undo_junk);
+      if (tm->undo_buffer[pos] == STBTE__redo_record)
+         break;
+      if (tm->undo_buffer[pos] >= 255)
+         pos = stbte__wrap(pos+5), i += 5;
+      else
+         pos = stbte__wrap(pos+4), i += 4;
+   }
+   if (i >= tm->redo_len)
+      return -1; // this should only ever happen if redo buffer is empty
+   return pos;
+}
+
+static void stbte__redo(stbte_tilemap *tm)
+{
+   // first scan through for the end record
+   int i, pos, endpos;
+   endpos = stbte__redo_find_end(tm);
+   if (endpos < 0)
+      return;
+
+   // we found a complete redo record
+   pos = stbte__wrap(tm->undo_pos+1);
+
+   // start an undo record
+   stbte__write_undo(tm, STBTE__undo_record);
+
+   for (i=0; pos != endpos; i += 4) {
+      int x,y,n,v;
+      n = tm->undo_buffer[pos];
+      y = tm->undo_buffer[stbte__wrap(pos+1)];
+      x = tm->undo_buffer[stbte__wrap(pos+2)];
+      v = tm->undo_buffer[stbte__wrap(pos+3)];
+      if (n >= 255) {
+         int v2 = tm->undo_buffer[stbte__wrap(pos+4)];
+         short s0=0,s1=0;
+         pos = stbte__wrap(pos+5);
+         if (n > 255) {
+            float vf = stbte__extract_float(v, v2);
+            s0 = stbte__extract_short(tm->props[y][x][n-256],0);
+            s1 = stbte__extract_short(tm->props[y][x][n-256],1);
+            tm->props[y][x][n-256] = vf;
+         } else {
+#ifdef STBTE_ALLOW_LINK
+            s0 = tm->link[y][x].x;
+            s1 = tm->link[y][x].y;
+            stbte__set_link(tm, x,y,v,v2, STBTE__undo_none);
+#endif
+         }
+         // don't use stbte__undo_record_prop because it's guarded
+         stbte__write_undo(tm, s1);
+         stbte__write_undo(tm, s0);
+         stbte__write_undo(tm, x);
+         stbte__write_undo(tm, y);
+         stbte__write_undo(tm, n);
+      } else {
+         pos = stbte__wrap(pos+4);
+         // don't use stbte__undo_record because it's guarded
+         stbte__write_undo(tm, tm->data[y][x][n]);
+         stbte__write_undo(tm, x);
+         stbte__write_undo(tm, y);
+         stbte__write_undo(tm, n);
+         tm->data[y][x][n] = (short) v;
+      }
+   }
+   tm->undo_buffer[tm->undo_pos] = STBTE__undo_junk;
+}
+
+// because detecting that undo is available
+static void stbte__recompute_undo_available(stbte_tilemap *tm)
+{
+   tm->undo_available = (stbte__undo_find_end(tm) >= 0);
+   tm->redo_available = (stbte__redo_find_end(tm) >= 0);
+}
+
+static int stbte__undo_available(stbte_tilemap *tm)
+{
+   if (!tm->undo_available_valid)
+      stbte__recompute_undo_available(tm);
+   return tm->undo_available;
+}
+
+static int stbte__redo_available(stbte_tilemap *tm)
+{
+   if (!tm->undo_available_valid)
+      stbte__recompute_undo_available(tm);
+   return tm->redo_available;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef STBTE_ALLOW_LINK
+static void stbte__set_link(stbte_tilemap *tm, int src_x, int src_y, int dest_x, int dest_y, int undo_mode)
+{
+   stbte__link *a;
+   STBTE_ASSERT(src_x >= 0 && src_x < STBTE_MAX_TILEMAP_X && src_y >= 0 && src_y < STBTE_MAX_TILEMAP_Y);
+   a = &tm->link[src_y][src_x];
+   // check if it's a do nothing
+   if (a->x == dest_x && a->y == dest_y)
+      return;
+   if (undo_mode != STBTE__undo_none ) {
+      if (undo_mode == STBTE__undo_block) stbte__begin_undo(tm);
+      stbte__undo_record_prop(tm, src_x, src_y, -1, a->x, a->y);
+      if (undo_mode == STBTE__undo_block) stbte__end_undo(tm);
+   }
+   // check if there's an existing link
+   if (a->x >= 0) {
+      // decrement existing link refcount
+      STBTE_ASSERT(tm->linkcount[a->y][a->x] > 0);
+      --tm->linkcount[a->y][a->x];
+   }
+   // increment new dest
+   if (dest_x >= 0) {
+      ++tm->linkcount[dest_y][dest_x];
+   }
+   a->x = dest_x;
+   a->y = dest_y;
+}
+#endif
+
+
+static void stbte__draw_rect(int x0, int y0, int x1, int y1, unsigned int color)
+{
+   STBTE_DRAW_RECT(x0,y0,x1,y1, color);
+}
+
+#ifdef STBTE_ALLOW_LINK
+static void stbte__draw_line(int x0, int y0, int x1, int y1, unsigned int color)
+{
+   int temp;
+   if (x1 < x0) temp=x0,x0=x1,x1=temp;
+   if (y1 < y0) temp=y0,y0=y1,y1=temp;
+   stbte__draw_rect(x0,y0,x1+1,y1+1,color);
+}
+
+static void stbte__draw_link(int x0, int y0, int x1, int y1, unsigned int color)
+{
+   stbte__draw_line(x0,y0,x0,y1, color);
+   stbte__draw_line(x0,y1,x1,y1, color);
+}
+#endif
+
+static void stbte__draw_frame(int x0, int y0, int x1, int y1, unsigned int color)
+{
+   stbte__draw_rect(x0,y0,x1-1,y0+1,color);
+   stbte__draw_rect(x1-1,y0,x1,y1-1,color);
+   stbte__draw_rect(x0+1,y1-1,x1,y1,color);
+   stbte__draw_rect(x0,y0+1,x0+1,y1,color);
+}
+
+static int stbte__get_char_width(int ch)
+{
+   return stbte__fontdata[ch-16];
+}
+
+static short *stbte__get_char_bitmap(int ch)
+{
+   return stbte__fontdata + stbte__font_offset[ch-16];
+}
+
+static void stbte__draw_bitmask_as_columns(int x, int y, short bitmask, int color)
+{
+   int start_i = -1, i=0;
+   while (bitmask) {
+      if (bitmask & (1<<i)) {
+         if (start_i < 0)
+            start_i = i;
+      } else if (start_i >= 0) {
+         stbte__draw_rect(x, y+start_i, x+1, y+i, color);
+         start_i = -1;
+         bitmask &= ~((1<<i)-1); // clear all the old bits; we don't clear them as we go to save code
+      }
+      ++i;
+   }
+}
+
+static void stbte__draw_bitmap(int x, int y, int w, short *bitmap, int color)
+{
+   int i;
+   for (i=0; i < w; ++i)
+      stbte__draw_bitmask_as_columns(x+i, y, *bitmap++, color);
+}
+
+static void stbte__draw_text_core(int x, int y, const char *str, int w, int color, int digitspace)
+{
+   int x_end = x+w;
+   while (*str) {
+      int c = *str++;
+      int cw = stbte__get_char_width(c);
+      if (x + cw > x_end)
+         break;
+      stbte__draw_bitmap(x, y, cw, stbte__get_char_bitmap(c), color);
+      if (digitspace && c == ' ')
+         cw = stbte__get_char_width('0');
+      x += cw+1;
+   }
+}
+
+static void stbte__draw_text(int x, int y, const char *str, int w, int color)
+{
+   stbte__draw_text_core(x,y,str,w,color,0);
+}
+
+static int stbte__text_width(const char *str)
+{
+   int x = 0;
+   while (*str) {
+      int c = *str++;
+      int cw = stbte__get_char_width(c);
+      x += cw+1;
+   }
+   return x;
+}
+
+static void stbte__draw_frame_delayed(int x0, int y0, int x1, int y1, int color)
+{
+   if (stbte__ui.delaycount < STBTE__MAX_DELAYRECT) {
+      stbte__colorrect r = { x0,y0,x1,y1,color };
+      stbte__ui.delayrect[stbte__ui.delaycount++] = r;
+   }
+}
+
+static void stbte__flush_delay(void)
+{
+   stbte__colorrect *r;
+   int i;
+   r = stbte__ui.delayrect;
+   for (i=0; i < stbte__ui.delaycount; ++i,++r)
+      stbte__draw_frame(r->x0,r->y0,r->x1,r->y1,r->color);
+   stbte__ui.delaycount = 0;
+}
+
+static void stbte__activate(int id)
+{
+   stbte__ui.active_id = id;
+   stbte__ui.active_event = stbte__ui.event;
+   stbte__ui.accum_x = 0;
+   stbte__ui.accum_y = 0;
+}
+
+static int stbte__hittest(int x0, int y0, int x1, int y1, int id)
+{
+   int over =    stbte__ui.mx >= x0 && stbte__ui.my >= y0
+              && stbte__ui.mx <  x1 && stbte__ui.my <  y1;
+
+   if (over && stbte__ui.event >= STBTE__tick)
+      stbte__ui.next_hot_id = id;
+
+   return over;
+}
+
+static int stbte__button_core(int id)
+{
+   switch (stbte__ui.event) {
+      case STBTE__leftdown:
+         if (stbte__ui.hot_id == id && STBTE__INACTIVE())
+            stbte__activate(id);
+         break;
+      case STBTE__leftup:
+         if (stbte__ui.active_id == id && STBTE__IS_HOT(id)) {
+            stbte__activate(0);
+            return 1;
+         }
+         break;
+      case STBTE__rightdown:
+         if (stbte__ui.hot_id == id && STBTE__INACTIVE())
+            stbte__activate(id);
+         break;
+      case STBTE__rightup:
+         if (stbte__ui.active_id == id && STBTE__IS_HOT(id)) {
+            stbte__activate(0);
+            return -1;
+         }
+         break;
+   }
+   return 0;
+}
+
+static void stbte__draw_box(int x0, int y0, int x1, int y1, int colormode, int colorindex)
+{
+   stbte__draw_rect (x0,y0,x1,y1, stbte__color_table[colormode][STBTE__base   ][colorindex]);
+   stbte__draw_frame(x0,y0,x1,y1, stbte__color_table[colormode][STBTE__outline][colorindex]);
+}
+
+static void stbte__draw_textbox(int x0, int y0, int x1, int y1, char *text, int xoff, int yoff, int colormode, int colorindex)
+{
+   stbte__draw_box(x0,y0,x1,y1,colormode,colorindex);
+   stbte__draw_text(x0+xoff,y0+yoff, text, x1-x0-xoff-1, stbte__color_table[colormode][STBTE__text][colorindex]);
+}
+
+static int stbte__button(int colormode, const char *label, int x, int y, int textoff, int width, int id, int toggled, int disabled)
+{
+   int x0=x,y0=y, x1=x+width,y1=y+STBTE__BUTTON_HEIGHT;
+   int s = STBTE__BUTTON_INTERNAL_SPACING;
+
+   if(!disabled) stbte__hittest(x0,y0,x1,y1,id);
+
+   if (stbte__ui.event == STBTE__paint)
+      stbte__draw_textbox(x0,y0,x1,y1, (char*) label,s+textoff,s, colormode, STBTE__INDEX_FOR_ID(id,disabled,toggled));
+   if (disabled)
+      return 0;
+   return (stbte__button_core(id) == 1);
+}
+
+static int stbte__button_icon(int colormode, char ch, int x, int y, int width, int id, int toggled, int disabled)
+{
+   int x0=x,y0=y, x1=x+width,y1=y+STBTE__BUTTON_HEIGHT;
+   int s = STBTE__BUTTON_INTERNAL_SPACING;
+
+   stbte__hittest(x0,y0,x1,y1,id);
+
+   if (stbte__ui.event == STBTE__paint) {
+      char label[2] = { ch, 0 };
+      int pad = (9 - stbte__get_char_width(ch))/2;
+      stbte__draw_textbox(x0,y0,x1,y1, label,s+pad,s, colormode, STBTE__INDEX_FOR_ID(id,disabled,toggled));
+   }
+   if (disabled)
+      return 0;
+   return (stbte__button_core(id) == 1);
+}
+
+static int stbte__minibutton(int colormode, int x, int y, int ch, int id)
+{
+   int x0 = x, y0 = y, x1 = x+8, y1 = y+7;
+   stbte__hittest(x0,y0,x1,y1,id);
+   if (stbte__ui.event == STBTE__paint) {
+      char str[2] = { (char)ch, 0 };
+      stbte__draw_textbox(x0,y0,x1,y1, str,1,0,colormode, STBTE__INDEX_FOR_ID(id,0,0));
+   }
+   return stbte__button_core(id);
+}
+
+static int stbte__layerbutton(int x, int y, int ch, int id, int toggled, int disabled, int colormode)
+{
+   int x0 = x, y0 = y, x1 = x+10, y1 = y+11;
+   if(!disabled) stbte__hittest(x0,y0,x1,y1,id);
+   if (stbte__ui.event == STBTE__paint) {
+      char str[2] = { (char)ch, 0 };
+      int off = (9-stbte__get_char_width(ch))/2;
+      stbte__draw_textbox(x0,y0,x1,y1, str, off+1,2, colormode, STBTE__INDEX_FOR_ID(id,disabled,toggled));
+   }
+   if (disabled)
+      return 0;
+   return stbte__button_core(id);
+}
+
+static int stbte__microbutton(int x, int y, int size, int id, int colormode)
+{
+   int x0 = x, y0 = y, x1 = x+size, y1 = y+size;
+   stbte__hittest(x0,y0,x1,y1,id);
+   if (stbte__ui.event == STBTE__paint) {
+      stbte__draw_box(x0,y0,x1,y1, colormode, STBTE__INDEX_FOR_ID(id,0,0));
+   }
+   return stbte__button_core(id);
+}
+
+static int stbte__microbutton_dragger(int x, int y, int size, int id, int *pos)
+{
+   int x0 = x, y0 = y, x1 = x+size, y1 = y+size;
+   stbte__hittest(x0,y0,x1,y1,id);
+   switch (stbte__ui.event) {
+      case STBTE__paint:
+         stbte__draw_box(x0,y0,x1,y1, STBTE__cexpander, STBTE__INDEX_FOR_ID(id,0,0));
+         break;
+      case STBTE__leftdown:
+         if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+            stbte__activate(id);
+            stbte__ui.sx = stbte__ui.mx - *pos;
+         }
+         break;
+      case STBTE__mousemove:
+         if (STBTE__IS_ACTIVE(id) && stbte__ui.active_event == STBTE__leftdown) {
+            *pos = stbte__ui.mx - stbte__ui.sx;
+         }
+         break;
+      case STBTE__leftup:
+         if (STBTE__IS_ACTIVE(id))
+            stbte__activate(0);
+         break;
+      default:
+         return stbte__button_core(id);
+   }
+   return 0;
+}
+
+static int stbte__category_button(const char *label, int x, int y, int width, int id, int toggled)
+{
+   int x0=x,y0=y, x1=x+width,y1=y+STBTE__BUTTON_HEIGHT;
+   int s = STBTE__BUTTON_INTERNAL_SPACING;
+
+   stbte__hittest(x0,y0,x1,y1,id);
+
+   if (stbte__ui.event == STBTE__paint)
+      stbte__draw_textbox(x0,y0,x1,y1, (char*) label, s,s, STBTE__ccategory_button, STBTE__INDEX_FOR_ID(id,0,toggled));
+
+   return (stbte__button_core(id) == 1);
+}
+
+enum
+{
+   STBTE__none,
+   STBTE__begin,
+   STBTE__end,
+   STBTE__change,
+};
+
+// returns -1 if value changes, 1 at end of drag
+static int stbte__slider(int x0, int w, int y, int range, int *value, int id)
+{
+   int x1 = x0+w;
+   int pos = *value * w / (range+1);
+   stbte__hittest(x0,y-2,x1,y+3,id);
+   int event_mouse_move = STBTE__change;
+   switch (stbte__ui.event) {
+      case STBTE__paint:
+         stbte__draw_rect(x0,y,x1,y+1, 0x808080);
+         stbte__draw_rect(x0+pos-1,y-1,x0+pos+2,y+2, 0xffffff);
+         break;
+      case STBTE__leftdown:
+         if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+            stbte__activate(id);
+            event_mouse_move = STBTE__begin;
+         }
+         // fall through
+      case STBTE__mousemove:
+         if (STBTE__IS_ACTIVE(id)) {
+            int v = (stbte__ui.mx-x0)*(range+1)/w;
+            if (v < 0) v = 0; else if (v > range) v = range;
+            *value = v;
+            return event_mouse_move;
+         }
+         break;
+      case STBTE__leftup:
+         if (STBTE__IS_ACTIVE(id)) {
+            stbte__activate(0);
+            return STBTE__end;
+         }
+         break;
+   }
+   return STBTE__none;
+}
+
+#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
+   #define stbte__sprintf      sprintf_s
+   #define stbte__sizeof(s)    , sizeof(s)
+#else
+   #define stbte__sprintf      sprintf
+   #define stbte__sizeof(s)
+#endif
+
+static int stbte__float_control(int x0, int y0, int w, float minv, float maxv, float scale, const char *fmt, float *value, int colormode, int id)
+{
+   int x1 = x0+w;
+   int y1 = y0+11;
+   stbte__hittest(x0,y0,x1,y1,id);
+   switch (stbte__ui.event) {
+      case STBTE__paint: {
+         char text[32];
+         stbte__sprintf(text stbte__sizeof(text), fmt ? fmt : "%6.2f", *value);
+         stbte__draw_textbox(x0,y0,x1,y1, text, 1,2, colormode, STBTE__INDEX_FOR_ID(id,0,0));
+         break;
+      }
+      case STBTE__leftdown:
+      case STBTE__rightdown:
+         if (STBTE__IS_HOT(id) && STBTE__INACTIVE())
+            stbte__activate(id);
+         return STBTE__begin;
+         break;
+      case STBTE__leftup:
+      case STBTE__rightup:
+         if (STBTE__IS_ACTIVE(id)) {
+            stbte__activate(0);
+            return STBTE__end;
+         }
+         break;
+      case STBTE__mousemove:
+         if (STBTE__IS_ACTIVE(id)) {
+            float v = *value, delta;
+            int ax = stbte__ui.accum_x/STBTE_FLOAT_CONTROL_GRANULARITY;
+            int ay = stbte__ui.accum_y/STBTE_FLOAT_CONTROL_GRANULARITY;
+            stbte__ui.accum_x -= ax*STBTE_FLOAT_CONTROL_GRANULARITY;
+            stbte__ui.accum_y -= ay*STBTE_FLOAT_CONTROL_GRANULARITY;
+            if (stbte__ui.shift) {
+               if (stbte__ui.active_event == STBTE__leftdown)
+                  delta = ax * 16.0f + ay;
+               else
+                  delta = ax / 16.0f + ay / 256.0f;
+            } else {
+               if (stbte__ui.active_event == STBTE__leftdown)
+                  delta = ax*10.0f + ay;
+               else
+                  delta = ax * 0.1f + ay * 0.01f;
+            }
+            v += delta * scale;
+            if (v < minv) v = minv;
+            if (v > maxv) v = maxv;
+            *value = v;
+            return STBTE__change;
+         }
+         break;
+   }
+   return STBTE__none;
+}
+
+static void stbte__scrollbar(int x, int y0, int y1, int *val, int v0, int v1, int num_vis, int id)
+{
+   int thumbpos;
+   if (v1 - v0 <= num_vis)
+      return;
+
+   // generate thumbpos from numvis
+   thumbpos = y0+2 + (y1-y0-4) * *val / (v1 - v0 - num_vis);
+   if (thumbpos < y0) thumbpos = y0;
+   if (thumbpos >= y1) thumbpos = y1;
+   stbte__hittest(x-1,y0,x+2,y1,id);
+   switch (stbte__ui.event) {
+      case STBTE__paint:
+         stbte__draw_rect(x,y0,x+1,y1, stbte__color_table[STBTE__cscrollbar][STBTE__text][STBTE__idle]);
+         stbte__draw_box(x-1,thumbpos-3,x+2,thumbpos+4, STBTE__cscrollbar, STBTE__INDEX_FOR_ID(id,0,0));
+         break;
+      case STBTE__leftdown:
+         if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+            // check if it's over the thumb
+            stbte__activate(id);
+            *val = ((stbte__ui.my-y0) * (v1 - v0 - num_vis) + (y1-y0)/2)/ (y1-y0);
+         }
+         break;
+      case STBTE__mousemove:
+         if (STBTE__IS_ACTIVE(id) && stbte__ui.mx >= x-15 && stbte__ui.mx <= x+15)
+            *val = ((stbte__ui.my-y0) * (v1 - v0 - num_vis) + (y1-y0)/2)/ (y1-y0);
+         break;
+      case STBTE__leftup:
+         if (STBTE__IS_ACTIVE(id))
+            stbte__activate(0);
+         break;
+
+   }
+
+   if (*val >= v1-num_vis)
+      *val = v1-num_vis;
+   if (*val <= v0)
+      *val = v0;
+}
+
+
+static void stbte__compute_digits(stbte_tilemap *tm)
+{
+   if (tm->max_x >= 1000 || tm->max_y >= 1000)
+      tm->digits = 4;
+   else if (tm->max_x >= 100 || tm->max_y >= 100)
+      tm->digits = 3;
+   else
+      tm->digits = 2;
+}
+
+static int stbte__is_single_selection(void)
+{
+   return stbte__ui.has_selection
+       && stbte__ui.select_x0 == stbte__ui.select_x1
+       && stbte__ui.select_y0 == stbte__ui.select_y1;
+}
+
+typedef struct
+{
+   int width, height;
+   int x,y;
+   int active;
+   float retracted;
+} stbte__region_t;
+
+static stbte__region_t stbte__region[4];
+
+#define STBTE__TOOLBAR_ICON_SIZE   (9+2*2)
+#define STBTE__TOOLBAR_PASTE_SIZE  (34+2*2)
+
+// This routine computes where every panel goes onscreen: computes
+// a minimum width for each side based on which panels are on that
+// side, and accounts for width-dependent layout of certain panels.
+static void stbte__compute_panel_locations(stbte_tilemap *tm)
+{
+   int i, limit, w, k;
+   int window_width  = stbte__ui.x1 - stbte__ui.x0;
+   int window_height = stbte__ui.y1 - stbte__ui.y0;
+   int min_width[STBTE__num_panel]={0,0,0,0,0,0,0};
+   int height[STBTE__num_panel]={0,0,0,0,0,0,0};
+   int panel_active[STBTE__num_panel]={1,0,1,1,1,1,1};
+   int vpos[4] = { 0,0,0,0 };
+   stbte__panel *p = stbte__ui.panel;
+   stbte__panel *pt = &p[STBTE__panel_toolbar];
+#ifdef STBTE__NO_PROPS
+   int props = 0;
+#else
+   int props = 1;
+#endif
+
+   for (i=0; i < 4; ++i) {
+      stbte__region[i].active = 0;
+      stbte__region[i].width = 0;
+      stbte__region[i].height = 0;
+   }
+
+   // compute number of digits needs for info panel
+   stbte__compute_digits(tm);
+
+   // determine which panels are active
+   panel_active[STBTE__panel_categories] = tm->num_categories != 0;
+   panel_active[STBTE__panel_layers    ] = tm->num_layers     >  1;
+#ifdef STBTE__COLORPICKER
+   panel_active[STBTE__panel_colorpick ] = 1;
+#endif
+
+   panel_active[STBTE__panel_props     ] = props && stbte__is_single_selection();
+
+   // compute minimum widths for each panel (assuming they're on sides not top)
+   min_width[STBTE__panel_info      ] = 8 + 11 + 7*tm->digits+17+7;               // estimate min width of "w:0000"
+   min_width[STBTE__panel_colorpick ] = 120;
+   min_width[STBTE__panel_tiles     ] = 4 + tm->palette_spacing_x + 5;            // 5 for scrollbar
+   min_width[STBTE__panel_categories] = 4 + 42 + 5;                               // 42 is enough to show ~7 chars; 5 for scrollbar
+   min_width[STBTE__panel_layers    ] = 4 + 54 + 30*tm->has_layer_names;          // 2 digits plus 3 buttons plus scrollbar
+   min_width[STBTE__panel_toolbar   ] = 4 + STBTE__TOOLBAR_PASTE_SIZE;            // wide enough for 'Paste' button
+   min_width[STBTE__panel_props     ] = 80;                    // narrowest info panel
+
+   // compute minimum widths for left & right panels based on the above
+   stbte__region[0].width = stbte__ui.left_width;
+   stbte__region[1].width = stbte__ui.right_width;
+
+   for (i=0; i < STBTE__num_panel; ++i) {
+      if (panel_active[i]) {
+         int side = stbte__ui.panel[i].side;
+         if (min_width[i] > stbte__region[side].width)
+            stbte__region[side].width = min_width[i];
+         stbte__region[side].active = 1;
+      }
+   }
+
+   // now compute the heights of each panel
+
+   // if toolbar at top, compute its size & push the left and right start points down
+   if (stbte__region[STBTE__side_top].active) {
+      int height = STBTE__TOOLBAR_ICON_SIZE+2;
+      pt->x0     = stbte__ui.x0;
+      pt->y0     = stbte__ui.y0;
+      pt->width  = window_width;
+      pt->height = height;
+      vpos[STBTE__side_left] = vpos[STBTE__side_right] = height;
+   } else {
+      int num_rows = STBTE__num_tool * ((stbte__region[pt->side].width-4)/STBTE__TOOLBAR_ICON_SIZE);
+      height[STBTE__panel_toolbar] = num_rows*13 + 3*15 + 4; // 3*15 for cut/copy/paste, which are stacked vertically
+   }
+
+   for (i=0; i < 4; ++i)
+      stbte__region[i].y = stbte__ui.y0 + vpos[i];
+
+   for (i=0; i < 2; ++i) {
+      int anim = (int) (stbte__region[i].width * stbte__region[i].retracted);
+      stbte__region[i].x = (i == STBTE__side_left) ? stbte__ui.x0 - anim : stbte__ui.x1 - stbte__region[i].width + anim;
+   }
+
+   // color picker
+   height[STBTE__panel_colorpick] = 300;
+
+   // info panel
+   w = stbte__region[p[STBTE__panel_info].side].width;
+   p[STBTE__panel_info].mode = (w >= 8 + (11+7*tm->digits+17)*2 + 4);
+   if (p[STBTE__panel_info].mode)
+      height[STBTE__panel_info] = 5 + 11*2 + 2 + tm->palette_spacing_y;
+   else
+      height[STBTE__panel_info] = 5 + 11*4 + 2 + tm->palette_spacing_y;
+
+   // layers
+   limit = 6 + stbte__ui.panel[STBTE__panel_layers].delta_height;
+   height[STBTE__panel_layers] = (tm->num_layers > limit ? limit : tm->num_layers)*15 + 7 + (tm->has_layer_names ? 0 : 11) + props*13;
+
+   // categories
+   limit = 6 + stbte__ui.panel[STBTE__panel_categories].delta_height;
+   height[STBTE__panel_categories] = (tm->num_categories+1 > limit ? limit : tm->num_categories+1)*11 + 14;
+   if (stbte__ui.panel[STBTE__panel_categories].side == stbte__ui.panel[STBTE__panel_categories].side)
+      height[STBTE__panel_categories] -= 4;
+
+   // palette
+   k =  (stbte__region[p[STBTE__panel_tiles].side].width - 8) / tm->palette_spacing_x;
+   if (k == 0) k = 1;
+   height[STBTE__panel_tiles] = ((tm->num_tiles+k-1)/k) * tm->palette_spacing_y + 8;
+
+   // properties panel
+   height[STBTE__panel_props] = 9 + STBTE_MAX_PROPERTIES*14;
+
+   // now compute the locations of all the panels
+   for (i=0; i < STBTE__num_panel; ++i) {
+      if (panel_active[i]) {
+         int side = p[i].side;
+         if (side == STBTE__side_left || side == STBTE__side_right) {
+            p[i].width  = stbte__region[side].width;
+            p[i].x0     = stbte__region[side].x;
+            p[i].y0     = stbte__ui.y0 + vpos[side];
+            p[i].height = height[i];
+            vpos[side] += height[i];
+            if (vpos[side] > window_height) {
+               vpos[side] = window_height;
+               p[i].height = stbte__ui.y1 - p[i].y0;
+            }
+         } else {
+            ; // it's at top, it's already been explicitly set up earlier
+         }
+      } else {
+         // inactive panel
+         p[i].height = 0;
+         p[i].width  = 0;
+         p[i].x0     = stbte__ui.x1;
+         p[i].y0     = stbte__ui.y1;
+      }
+   }
+}
+
+// unique identifiers for imgui
+enum
+{
+   STBTE__map=1,
+   STBTE__region,
+   STBTE__panel,                          // panel background to hide map, and misc controls
+   STBTE__info,                           // info data
+   STBTE__toolbarA, STBTE__toolbarB,      // toolbar buttons: param is tool number
+   STBTE__palette,                        // palette selectors: param is tile index
+   STBTE__categories,                     // category selectors: param is category index
+   STBTE__layer,                          //
+   STBTE__solo, STBTE__hide, STBTE__lock, // layer controls: param is layer
+   STBTE__scrollbar,                      // param is panel ID
+   STBTE__panel_mover,                    // p1 is panel ID, p2 is destination side
+   STBTE__panel_sizer,                    // param panel ID
+   STBTE__scrollbar_id,
+   STBTE__colorpick_id,
+   STBTE__prop_flag,
+   STBTE__prop_float,
+   STBTE__prop_int,
+};
+
+// id is:      [      24-bit data     : 7-bit identifier ]
+// map id is:  [  12-bit y : 12 bit x : 7-bit identifier ]
+
+#define STBTE__ID(n,p)     ((n) + ((p)<<7))
+#define STBTE__ID2(n,p,q)  STBTE__ID(n, ((p)<<12)+(q) )
+#define STBTE__IDMAP(x,y)  STBTE__ID2(STBTE__map, x,y)
+
+static void stbte__activate_map(int x, int y)
+{
+   stbte__ui.active_id = STBTE__IDMAP(x,y);
+   stbte__ui.active_event = stbte__ui.event;
+   stbte__ui.sx = x;
+   stbte__ui.sy = y;
+}
+
+static void stbte__alert(const char *msg)
+{
+   stbte__ui.alert_msg = msg;
+   stbte__ui.alert_timer = 3;
+}
+
+#define STBTE__BG(tm,layer) ((layer) == 0 ? (tm)->background_tile : STBTE__NO_TILE)
+
+
+
+static void stbte__brush_predict(stbte_tilemap *tm, short result[])
+{
+   stbte__tileinfo *ti;
+   int i;
+
+   if (tm->cur_tile < 0) return;
+
+   ti = &tm->tiles[tm->cur_tile];
+
+   // find lowest legit layer to paint it on, and put it there
+   for (i=0; i < tm->num_layers; ++i) {
+      // check if object is allowed on layer
+      if (!(ti->layermask & (1 << i)))
+         continue;
+
+      if (i != tm->solo_layer) {
+         // if there's a selected layer, can only paint on that
+         if (tm->cur_layer >= 0 && i != tm->cur_layer)
+            continue;
+
+         // if the layer is hidden, we can't see it
+         if (tm->layerinfo[i].hidden)
+            continue;
+
+         // if the layer is locked, we can't write to it
+         if (tm->layerinfo[i].locked == STBTE__locked)
+            continue;
+
+         // if the layer is non-empty and protected, can't write to it
+         if (tm->layerinfo[i].locked == STBTE__protected && result[i] != STBTE__BG(tm,i))
+            continue;
+      }
+
+      result[i] = ti->id;
+      return;
+   }
+}
+
+static void stbte__brush(stbte_tilemap *tm, int x, int y)
+{
+   stbte__tileinfo *ti;
+
+   // find lowest legit layer to paint it on, and put it there
+   int i;
+
+   if (tm->cur_tile < 0) return;
+
+   ti = &tm->tiles[tm->cur_tile];
+
+   for (i=0; i < tm->num_layers; ++i) {
+      // check if object is allowed on layer
+      if (!(ti->layermask & (1 << i)))
+         continue;
+
+      if (i != tm->solo_layer) {
+         // if there's a selected layer, can only paint on that
+         if (tm->cur_layer >= 0 && i != tm->cur_layer)
+            continue;
+
+         // if the layer is hidden, we can't see it
+         if (tm->layerinfo[i].hidden)
+            continue;
+
+         // if the layer is locked, we can't write to it
+         if (tm->layerinfo[i].locked == STBTE__locked)
+            continue;
+
+         // if the layer is non-empty and protected, can't write to it
+         if (tm->layerinfo[i].locked == STBTE__protected && tm->data[y][x][i] != STBTE__BG(tm,i))
+            continue;
+      }
+
+      stbte__undo_record(tm,x,y,i,tm->data[y][x][i]);
+      tm->data[y][x][i] = ti->id;
+      return;
+   }
+
+   //stbte__alert("Selected tile not valid on active layer(s)");
+}
+
+enum
+{
+   STBTE__erase_none = -1,
+   STBTE__erase_brushonly = 0,
+   STBTE__erase_any = 1,
+   STBTE__erase_all = 2,
+};
+
+static int stbte__erase_predict(stbte_tilemap *tm, short result[], int allow_any)
+{
+   stbte__tileinfo *ti = tm->cur_tile >= 0 ? &tm->tiles[tm->cur_tile] : NULL;
+   int i;
+
+   if (allow_any == STBTE__erase_none)
+      return allow_any;
+
+   // first check if only one layer is legit
+   i = tm->cur_layer;
+   if (tm->solo_layer >= 0)
+      i = tm->solo_layer;
+
+   // if only one layer is legit, directly process that one for clarity
+   if (i >= 0) {
+      short bg = (i == 0 ? tm->background_tile : -1);
+      if (tm->solo_layer < 0) {
+         // check that we're allowed to write to it
+         if (tm->layerinfo[i].hidden) return STBTE__erase_none;
+         if (tm->layerinfo[i].locked) return STBTE__erase_none;
+      }
+      if (result[i] == bg)
+         return STBTE__erase_none; // didn't erase anything
+      if (ti && result[i] == ti->id && (i != 0 || ti->id != tm->background_tile)) {
+         result[i] = bg;
+         return STBTE__erase_brushonly;
+      }
+      if (allow_any == STBTE__erase_any) {
+         result[i] = bg;
+         return STBTE__erase_any;
+      }
+      return STBTE__erase_none;
+   }
+
+   // if multiple layers are legit, first scan all for brush data
+
+   if (ti && allow_any != STBTE__erase_all) {
+      for (i=tm->num_layers-1; i >= 0; --i) {
+         if (result[i] != ti->id)
+            continue;
+         if (tm->layerinfo[i].locked || tm->layerinfo[i].hidden)
+            continue;
+         if (i == 0 && result[i] == tm->background_tile)
+            return STBTE__erase_none;
+         result[i] = STBTE__BG(tm,i);
+         return STBTE__erase_brushonly;
+      }
+   }
+
+   if (allow_any != STBTE__erase_any && allow_any != STBTE__erase_all)
+      return STBTE__erase_none;
+
+   // apply layer filters, erase from top
+   for (i=tm->num_layers-1; i >= 0; --i) {
+      if (result[i] < 0)
+         continue;
+      if (tm->layerinfo[i].locked || tm->layerinfo[i].hidden)
+         continue;
+      if (i == 0 && result[i] == tm->background_tile)
+         return STBTE__erase_none;
+      result[i] = STBTE__BG(tm,i);
+      if (allow_any != STBTE__erase_all)
+         return STBTE__erase_any;
+   }
+
+   if (allow_any == STBTE__erase_all)
+      return allow_any;
+   return STBTE__erase_none;
+}
+
+static int stbte__erase(stbte_tilemap *tm, int x, int y, int allow_any)
+{
+   stbte__tileinfo *ti = tm->cur_tile >= 0 ? &tm->tiles[tm->cur_tile] : NULL;
+   int i;
+
+   if (allow_any == STBTE__erase_none)
+      return allow_any;
+
+   // first check if only one layer is legit
+   i = tm->cur_layer;
+   if (tm->solo_layer >= 0)
+      i = tm->solo_layer;
+
+   // if only one layer is legit, directly process that one for clarity
+   if (i >= 0) {
+      short bg = (i == 0 ? tm->background_tile : -1);
+      if (tm->solo_layer < 0) {
+         // check that we're allowed to write to it
+         if (tm->layerinfo[i].hidden) return STBTE__erase_none;
+         if (tm->layerinfo[i].locked) return STBTE__erase_none;
+      }
+      if (tm->data[y][x][i] == bg)
+         return -1; // didn't erase anything
+      if (ti && tm->data[y][x][i] == ti->id && (i != 0 || ti->id != tm->background_tile)) {
+         stbte__undo_record(tm,x,y,i,tm->data[y][x][i]);
+         tm->data[y][x][i] = bg;
+         return STBTE__erase_brushonly;
+      }
+      if (allow_any == STBTE__erase_any) {
+         stbte__undo_record(tm,x,y,i,tm->data[y][x][i]);
+         tm->data[y][x][i] = bg;
+         return STBTE__erase_any;
+      }
+      return STBTE__erase_none;
+   }
+
+   // if multiple layers are legit, first scan all for brush data
+
+   if (ti && allow_any != STBTE__erase_all) {
+      for (i=tm->num_layers-1; i >= 0; --i) {
+         if (tm->data[y][x][i] != ti->id)
+            continue;
+         if (tm->layerinfo[i].locked || tm->layerinfo[i].hidden)
+            continue;
+         if (i == 0 && tm->data[y][x][i] == tm->background_tile)
+            return STBTE__erase_none;
+         stbte__undo_record(tm,x,y,i,tm->data[y][x][i]);
+         tm->data[y][x][i] = STBTE__BG(tm,i);
+         return STBTE__erase_brushonly;
+      }
+   }
+
+   if (allow_any != STBTE__erase_any && allow_any != STBTE__erase_all)
+      return STBTE__erase_none;
+
+   // apply layer filters, erase from top
+   for (i=tm->num_layers-1; i >= 0; --i) {
+      if (tm->data[y][x][i] < 0)
+         continue;
+      if (tm->layerinfo[i].locked || tm->layerinfo[i].hidden)
+         continue;
+      if (i == 0 && tm->data[y][x][i] == tm->background_tile)
+         return STBTE__erase_none;
+      stbte__undo_record(tm,x,y,i,tm->data[y][x][i]);
+      tm->data[y][x][i] = STBTE__BG(tm,i);
+      if (allow_any != STBTE__erase_all)
+         return STBTE__erase_any;
+   }
+   if (allow_any == STBTE__erase_all)
+      return allow_any;
+   return STBTE__erase_none;
+}
+
+static int stbte__find_tile(stbte_tilemap *tm, int tile_id)
+{
+   int i;
+   for (i=0; i < tm->num_tiles; ++i)
+      if (tm->tiles[i].id == tile_id)
+         return i;
+   stbte__alert("Eyedropped tile that isn't in tileset");
+   return -1;
+}
+
+static void stbte__eyedrop(stbte_tilemap *tm, int x, int y)
+{
+   int i,j;
+
+   // flush eyedropper state
+   if (stbte__ui.eyedrop_x != x || stbte__ui.eyedrop_y != y) {
+      stbte__ui.eyedrop_x = x;
+      stbte__ui.eyedrop_y = y;
+      stbte__ui.eyedrop_last_layer = tm->num_layers;
+   }
+
+   // if only one layer is active, query that
+   i = tm->cur_layer;
+   if (tm->solo_layer >= 0)
+      i = tm->solo_layer;
+   if (i >= 0) {
+      if (tm->data[y][x][i] == STBTE__NO_TILE)
+         return;
+      tm->cur_tile = stbte__find_tile(tm, tm->data[y][x][i]);
+      return;
+   }
+
+   // if multiple layers, continue from previous
+   i = stbte__ui.eyedrop_last_layer;
+   for (j=0; j < tm->num_layers; ++j) {
+      if (--i < 0)
+         i = tm->num_layers-1;
+      if (tm->layerinfo[i].hidden)
+         continue;
+      if (tm->data[y][x][i] == STBTE__NO_TILE)
+         continue;
+      stbte__ui.eyedrop_last_layer = i;
+      tm->cur_tile = stbte__find_tile(tm, tm->data[y][x][i]);
+      return;
+   }
+}
+
+static int stbte__should_copy_properties(stbte_tilemap *tm)
+{
+   int i;
+   if (tm->propmode == STBTE__propmode_always)
+      return 1;
+   if (tm->propmode == STBTE__propmode_never)
+      return 0;
+   if (tm->solo_layer >= 0 || tm->cur_layer >= 0)
+      return 0;
+   for (i=0; i < tm->num_layers; ++i)
+      if (tm->layerinfo[i].hidden || tm->layerinfo[i].locked)
+         return 0;
+   return 1;
+}
+
+// compute the result of pasting into a tile non-destructively so we can preview it
+static void stbte__paste_stack(stbte_tilemap *tm, short result[], short dest[], short src[], int dragging)
+{
+   int i;
+
+   // special case single-layer
+   i = tm->cur_layer;
+   if (tm->solo_layer >= 0)
+      i = tm->solo_layer;
+   if (i >= 0) {
+      if (tm->solo_layer < 0) {
+         // check that we're allowed to write to it
+         if (tm->layerinfo[i].hidden) return;
+         if (tm->layerinfo[i].locked == STBTE__locked) return;
+         // if protected, dest has to be empty
+         if (tm->layerinfo[i].locked == STBTE__protected && dest[i] != STBTE__BG(tm,i)) return;
+         // if dragging w/o copy, we will try to erase stuff, which protection disallows
+         if (dragging && tm->layerinfo[i].locked == STBTE__protected)
+             return;
+      }
+      result[i] = dest[i];
+      if (src[i] != STBTE__BG(tm,i))
+         result[i] = src[i];
+      return;
+   }
+
+   for (i=0; i < tm->num_layers; ++i) {
+      result[i] = dest[i];
+      if (src[i] != STBTE__NO_TILE)
+         if (!tm->layerinfo[i].hidden && tm->layerinfo[i].locked != STBTE__locked)
+            if (tm->layerinfo[i].locked == STBTE__unlocked || (!dragging && dest[i] == STBTE__BG(tm,i)))
+               result[i] = src[i];
+   }
+}
+
+// compute the result of dragging away from a tile
+static void stbte__clear_stack(stbte_tilemap *tm, short result[])
+{
+   int i;
+   // special case single-layer
+   i = tm->cur_layer;
+   if (tm->solo_layer >= 0)
+      i = tm->solo_layer;
+   if (i >= 0)
+      result[i] = STBTE__BG(tm,i);
+   else
+      for (i=0; i < tm->num_layers; ++i)
+         if (!tm->layerinfo[i].hidden && tm->layerinfo[i].locked == STBTE__unlocked)
+            result[i] = STBTE__BG(tm,i);
+}
+
+// check if some map square is active
+#define STBTE__IS_MAP_ACTIVE()  ((stbte__ui.active_id & 127) == STBTE__map)
+#define STBTE__IS_MAP_HOT()     ((stbte__ui.hot_id & 127) == STBTE__map)
+
+static void stbte__fillrect(stbte_tilemap *tm, int x0, int y0, int x1, int y1, int fill)
+{
+   int i,j;
+
+   stbte__begin_undo(tm);
+   if (x0 > x1) i=x0,x0=x1,x1=i;
+   if (y0 > y1) j=y0,y0=y1,y1=j;
+   for (j=y0; j <= y1; ++j)
+      for (i=x0; i <= x1; ++i)
+         if (fill)
+            stbte__brush(tm, i,j);
+         else
+            stbte__erase(tm, i,j,STBTE__erase_any);
+   stbte__end_undo(tm);
+   // suppress warning from brush
+   stbte__ui.alert_msg = 0;
+}
+
+static void stbte__select_rect(stbte_tilemap *tm, int x0, int y0, int x1, int y1)
+{
+   stbte__ui.has_selection = 1;
+   stbte__ui.select_x0 = (x0 < x1 ? x0 : x1);
+   stbte__ui.select_x1 = (x0 < x1 ? x1 : x0);
+   stbte__ui.select_y0 = (y0 < y1 ? y0 : y1);
+   stbte__ui.select_y1 = (y0 < y1 ? y1 : y0);
+}
+
+static void stbte__copy_properties(float *dest, float *src)
+{
+   int i;
+   for (i=0; i < STBTE_MAX_PROPERTIES; ++i)
+      dest[i] = src[i];
+}
+
+static void stbte__copy_cut(stbte_tilemap *tm, int cut)
+{
+   int i,j,n,w,h,p=0;
+   int copy_props = stbte__should_copy_properties(tm);
+   if (!stbte__ui.has_selection)
+      return;
+   w = stbte__ui.select_x1 - stbte__ui.select_x0 + 1;
+   h = stbte__ui.select_y1 - stbte__ui.select_y0 + 1;
+   if (STBTE_MAX_COPY / w < h) {
+      stbte__alert("Selection too large for copy buffer, increase STBTE_MAX_COPY");
+      return;
+   }
+
+   for (i=0; i < w*h; ++i)
+      for (n=0; n < tm->num_layers; ++n)
+         stbte__ui.copybuffer[i][n] = STBTE__NO_TILE;
+
+   if (cut)
+      stbte__begin_undo(tm);
+   for (j=stbte__ui.select_y0; j <= stbte__ui.select_y1; ++j) {
+      for (i=stbte__ui.select_x0; i <= stbte__ui.select_x1; ++i) {
+         for (n=0; n < tm->num_layers; ++n) {
+            if (tm->solo_layer >= 0) {
+               if (tm->solo_layer != n)
+                  continue;
+            } else {
+               if (tm->cur_layer >= 0)
+                  if (tm->cur_layer != n)
+                     continue;
+               if (tm->layerinfo[n].hidden)
+                  continue;
+               if (cut && tm->layerinfo[n].locked)
+                  continue;
+            }
+            stbte__ui.copybuffer[p][n] = tm->data[j][i][n];
+            if (cut) {
+               stbte__undo_record(tm,i,j,n, tm->data[j][i][n]);
+               tm->data[j][i][n] = (n==0 ? tm->background_tile : -1);
+            }
+         }
+         if (copy_props) {
+            stbte__copy_properties(stbte__ui.copyprops[p], tm->props[j][i]);
+#ifdef STBTE_ALLOW_LINK
+            stbte__ui.copylinks[p] = tm->link[j][i];
+            if (cut)
+               stbte__set_link(tm, i,j,-1,-1, STBTE__undo_record);
+#endif
+         }
+         ++p;
+      }
+   }
+   if (cut)
+      stbte__end_undo(tm);
+   stbte__ui.copy_width = w;
+   stbte__ui.copy_height = h;
+   stbte__ui.has_copy = 1;
+   //stbte__ui.has_selection = 0;
+   stbte__ui.copy_has_props = copy_props;
+   stbte__ui.copy_src = tm; // used to give better semantics when copying links
+   stbte__ui.copy_src_x = stbte__ui.select_x0;
+   stbte__ui.copy_src_y = stbte__ui.select_y0;
+}
+
+static int stbte__in_rect(int x, int y, int x0, int y0, int w, int h)
+{
+   return x >= x0 && x < x0+w && y >= y0 && y < y0+h;
+}
+
+#ifdef STBTE_ALLOW_LINK
+static int stbte__in_src_rect(int x, int y)
+{
+   return stbte__in_rect(x,y, stbte__ui.copy_src_x, stbte__ui.copy_src_y, stbte__ui.copy_width, stbte__ui.copy_height);
+}
+
+static int stbte__in_dest_rect(int x, int y, int destx, int desty)
+{
+   return stbte__in_rect(x,y, destx, desty, stbte__ui.copy_width, stbte__ui.copy_height);
+}
+#endif
+
+static void stbte__paste(stbte_tilemap *tm, int mapx, int mapy)
+{
+   int w = stbte__ui.copy_width;
+   int h = stbte__ui.copy_height;
+   int i,j,k,p;
+   int x = mapx - (w>>1);
+   int y = mapy - (h>>1);
+   int copy_props = stbte__should_copy_properties(tm) && stbte__ui.copy_has_props;
+   if (stbte__ui.has_copy == 0)
+      return;
+   stbte__begin_undo(tm);
+   p = 0;
+   for (j=0; j < h; ++j) {
+      for (i=0; i < w; ++i) {
+         if (y+j >= 0 && y+j < tm->max_y && x+i >= 0 && x+i < tm->max_x) {
+            // compute the new stack
+            short tilestack[STBTE_MAX_LAYERS];
+            for (k=0; k < tm->num_layers; ++k)
+               tilestack[k] = tm->data[y+j][x+i][k];
+            stbte__paste_stack(tm, tilestack, tilestack, stbte__ui.copybuffer[p], 0);
+            // update anything that changed
+            for (k=0; k < tm->num_layers; ++k) {
+               if (tilestack[k] != tm->data[y+j][x+i][k]) {
+                  stbte__undo_record(tm, x+i,y+j,k, tm->data[y+j][x+i][k]);
+                  tm->data[y+j][x+i][k] = tilestack[k];
+               }
+            }
+         }
+         if (copy_props) {
+#ifdef STBTE_ALLOW_LINK
+            // need to decide how to paste a link, so there's a few cases
+            int destx = -1, desty = -1;
+            stbte__link *link = &stbte__ui.copylinks[p];
+
+            // check if link is within-rect
+            if (stbte__in_src_rect(link->x, link->y)) {
+               // new link should point to copy (but only if copy is within map)
+               destx = x + (link->x - stbte__ui.copy_src_x);
+               desty = y + (link->y - stbte__ui.copy_src_y);
+            } else if (tm == stbte__ui.copy_src) {
+               // if same map, then preserve link unless target is overwritten
+               if (!stbte__in_dest_rect(link->x,link->y,x,y)) {
+                  destx = link->x;
+                  desty = link->y;
+               }
+            }
+            // this is necessary for offset-copy, but also in case max_x/max_y has changed
+            if (destx < 0 || destx >= tm->max_x || desty < 0 || desty >= tm->max_y)
+               destx = -1, desty = -1;
+            stbte__set_link(tm, x+i, y+j, destx, desty, STBTE__undo_record);
+#endif
+            for (k=0; k < STBTE_MAX_PROPERTIES; ++k) {
+               if (tm->props[y+j][x+i][k] != stbte__ui.copyprops[p][k])
+                  stbte__undo_record_prop_float(tm, x+i, y+j, k, tm->props[y+j][x+i][k]);
+            }
+            stbte__copy_properties(tm->props[y+j][x+i], stbte__ui.copyprops[p]);
+         }
+         ++p;
+      }
+   }
+   stbte__end_undo(tm);
+}
+
+static void stbte__drag_update(stbte_tilemap *tm, int mapx, int mapy, int copy_props)
+{
+   int w = stbte__ui.drag_w, h = stbte__ui.drag_h;
+   int ox,oy,i,deleted=0,written=0;
+   short temp[STBTE_MAX_LAYERS];
+   short *data = NULL;
+
+   STBTE__NOTUSED(deleted);
+   STBTE__NOTUSED(written);
+
+   if (!stbte__ui.shift) {
+      ox = mapx - stbte__ui.drag_x;
+      oy = mapy - stbte__ui.drag_y;
+      if (ox >= 0 && ox < w && oy >= 0 && oy < h) {
+         deleted=1;
+         for (i=0; i < tm->num_layers; ++i)
+            temp[i] = tm->data[mapy][mapx][i];
+         data = temp;
+         stbte__clear_stack(tm, data);
+      }
+   }
+   ox = mapx - stbte__ui.drag_dest_x;
+   oy = mapy - stbte__ui.drag_dest_y;
+   // if this map square is in the target drag region
+   if (ox >= 0 && ox < w && oy >= 0 && oy < h) {
+      // and the src map square is on the map
+      if (stbte__in_rect(stbte__ui.drag_x+ox, stbte__ui.drag_y+oy, 0, 0, tm->max_x, tm->max_y)) {
+         written = 1;
+         if (data == NULL) {
+            for (i=0; i < tm->num_layers; ++i)
+               temp[i] = tm->data[mapy][mapx][i];
+            data = temp;
+         }
+         stbte__paste_stack(tm, data, data, tm->data[stbte__ui.drag_y+oy][stbte__ui.drag_x+ox], !stbte__ui.shift);
+         if (copy_props) {
+            for (i=0; i < STBTE_MAX_PROPERTIES; ++i) {
+               if (tm->props[mapy][mapx][i] != tm->props[stbte__ui.drag_y+oy][stbte__ui.drag_x+ox][i]) {
+                  stbte__undo_record_prop_float(tm, mapx, mapy, i, tm->props[mapy][mapx][i]);
+                  tm->props[mapy][mapx][i] = tm->props[stbte__ui.drag_y+oy][stbte__ui.drag_x+ox][i];
+               }
+            }
+         }
+      }
+   }
+   if (data) {
+      for (i=0; i < tm->num_layers; ++i) {
+         if (tm->data[mapy][mapx][i] != data[i]) {
+            stbte__undo_record(tm, mapx, mapy, i, tm->data[mapy][mapx][i]);
+            tm->data[mapy][mapx][i] = data[i];
+         }
+      }
+   }
+   #ifdef STBTE_ALLOW_LINK
+   if (copy_props) {
+      int overwritten=0, moved=0, copied=0;
+      // since this function is called on EVERY tile, we can fix up even tiles not
+      // involved in the move
+
+      stbte__link *k;
+      // first, determine what src link ends up here
+      k = &tm->link[mapy][mapx]; // by default, it's the one currently here
+      if (deleted)               // if dragged away, it's erased
+         k = NULL;
+      if (written)               // if dragged into, it gets that link
+         k = &tm->link[stbte__ui.drag_y+oy][stbte__ui.drag_x+ox];
+
+      // now check whether the *target* gets moved or overwritten
+      if (k && k->x >= 0) {
+         overwritten = stbte__in_rect(k->x, k->y, stbte__ui.drag_dest_x, stbte__ui.drag_dest_y, w, h);
+         if (!stbte__ui.shift)
+            moved    = stbte__in_rect(k->x, k->y, stbte__ui.drag_x     , stbte__ui.drag_y     , w, h);
+         else
+            copied   = stbte__in_rect(k->x, k->y, stbte__ui.drag_x     , stbte__ui.drag_y     , w, h);
+      }
+
+      if (deleted || written || overwritten || moved || copied) {
+         // choose the final link value based on the above
+         if (k == NULL || k->x < 0)
+            stbte__set_link(tm, mapx, mapy, -1, -1, STBTE__undo_record);
+         else if (moved || (copied && written)) {
+            // if we move the target, we update to point to the new target;
+            // or, if we copy the target and the source is part of the copy, then update to new target
+            int x = k->x + (stbte__ui.drag_dest_x - stbte__ui.drag_x);
+            int y = k->y + (stbte__ui.drag_dest_y - stbte__ui.drag_y);
+            if (!(x >= 0 && y >= 0 && x < tm->max_x && y < tm->max_y))
+               x = -1, y = -1;
+            stbte__set_link(tm, mapx, mapy, x, y, STBTE__undo_record);
+         } else if (overwritten) {
+            stbte__set_link(tm, mapx, mapy, -1, -1, STBTE__undo_record);
+         } else
+            stbte__set_link(tm, mapx, mapy, k->x, k->y, STBTE__undo_record);
+      }
+   }
+   #endif
+}
+
+static void stbte__drag_place(stbte_tilemap *tm, int mapx, int mapy)
+{
+   int i,j;
+   int copy_props = stbte__should_copy_properties(tm);
+   int move_x = (stbte__ui.drag_dest_x - stbte__ui.drag_x);
+   int move_y = (stbte__ui.drag_dest_y - stbte__ui.drag_y);
+   if (move_x == 0 && move_y == 0)
+      return;
+
+   stbte__begin_undo(tm);
+   // we now need a 2D memmove-style mover that doesn't
+   // overwrite any data as it goes. this requires being
+   // direction sensitive in the same way as memmove
+   if (move_y > 0 || (move_y == 0 && move_x > 0)) {
+      for (j=tm->max_y-1; j >= 0; --j)
+         for (i=tm->max_x-1; i >= 0; --i)
+            stbte__drag_update(tm,i,j,copy_props);
+   } else {
+      for (j=0; j < tm->max_y; ++j)
+         for (i=0; i < tm->max_x; ++i)
+            stbte__drag_update(tm,i,j,copy_props);
+   }
+   stbte__end_undo(tm);
+
+   stbte__ui.has_selection = 1;
+   stbte__ui.select_x0 = stbte__ui.drag_dest_x;
+   stbte__ui.select_y0 = stbte__ui.drag_dest_y;
+   stbte__ui.select_x1 = stbte__ui.select_x0 + stbte__ui.drag_w - 1;
+   stbte__ui.select_y1 = stbte__ui.select_y0 + stbte__ui.drag_h - 1;
+}
+
+static void stbte__tile_paint(stbte_tilemap *tm, int sx, int sy, int mapx, int mapy, int layer)
+{
+   int i;
+   int id = STBTE__IDMAP(mapx,mapy);
+   int x0=sx, y0=sy;
+   int x1=sx+tm->spacing_x, y1=sy+tm->spacing_y;
+   stbte__hittest(x0,y0,x1,y1, id);
+   short *data = tm->data[mapy][mapx];
+   short temp[STBTE_MAX_LAYERS];
+
+   if (STBTE__IS_MAP_HOT()) {
+      if (stbte__ui.pasting) {
+         int ox = mapx - stbte__ui.paste_x;
+         int oy = mapy - stbte__ui.paste_y;
+         if (ox >= 0 && ox < stbte__ui.copy_width && oy >= 0 && oy < stbte__ui.copy_height) {
+            stbte__paste_stack(tm, temp, tm->data[mapy][mapx], stbte__ui.copybuffer[oy*stbte__ui.copy_width+ox], 0);
+            data = temp;
+         }
+      } else if (stbte__ui.dragging) {
+         int ox,oy;
+         for (i=0; i < tm->num_layers; ++i)
+            temp[i] = tm->data[mapy][mapx][i];
+         data = temp;
+
+         // if it's in the source area, remove things unless shift-dragging
+         ox = mapx - stbte__ui.drag_x;
+         oy = mapy - stbte__ui.drag_y;
+         if (!stbte__ui.shift && ox >= 0 && ox < stbte__ui.drag_w && oy >= 0 && oy < stbte__ui.drag_h) {
+            stbte__clear_stack(tm, temp);
+         }
+
+         ox = mapx - stbte__ui.drag_dest_x;
+         oy = mapy - stbte__ui.drag_dest_y;
+         if (ox >= 0 && ox < stbte__ui.drag_w && oy >= 0 && oy < stbte__ui.drag_h) {
+            stbte__paste_stack(tm, temp, temp, tm->data[stbte__ui.drag_y+oy][stbte__ui.drag_x+ox], !stbte__ui.shift);
+         }
+      } else if (STBTE__IS_MAP_ACTIVE()) {
+         if (stbte__ui.tool == STBTE__tool_rect) {
+            if ((stbte__ui.ms_time & 511) < 380) {
+               int ex = ((stbte__ui.hot_id >> 19) & 4095);
+               int ey = ((stbte__ui.hot_id >>  7) & 4095);
+               int sx = stbte__ui.sx;
+               int sy = stbte__ui.sy;
+
+               if (   ((mapx >= sx && mapx < ex+1) || (mapx >= ex && mapx < sx+1))
+                   && ((mapy >= sy && mapy < ey+1) || (mapy >= ey && mapy < sy+1))) {
+                  int i;
+                  for (i=0; i < tm->num_layers; ++i)
+                     temp[i] = tm->data[mapy][mapx][i];
+                  data = temp;
+                  if (stbte__ui.active_event == STBTE__leftdown)
+                     stbte__brush_predict(tm, temp);
+                  else
+                     stbte__erase_predict(tm, temp, STBTE__erase_any);
+               }
+            }
+         }
+      }
+   }
+
+   if (STBTE__IS_HOT(id) && STBTE__INACTIVE() && !stbte__ui.pasting) {
+      if (stbte__ui.tool == STBTE__tool_brush) {
+         if ((stbte__ui.ms_time & 511) < 300) {
+            data = temp;
+            for (i=0; i < tm->num_layers; ++i)
+               temp[i] = tm->data[mapy][mapx][i];
+            stbte__brush_predict(tm, temp);
+         }
+      }
+   }
+
+   {
+      i = layer;
+      if (i == tm->solo_layer || (!tm->layerinfo[i].hidden && tm->solo_layer < 0))
+         if (data[i] >= 0)
+            STBTE_DRAW_TILE(sx,sy, (unsigned short) data[i], 0, tm->props[mapy][mapx]);
+   }
+}
+
+static void stbte__tile(stbte_tilemap *tm, int sx, int sy, int mapx, int mapy)
+{
+   int tool = stbte__ui.tool;
+   int x0=sx, y0=sy;
+   int x1=sx+tm->spacing_x, y1=sy+tm->spacing_y;
+   int id = STBTE__IDMAP(mapx,mapy);
+   int over = stbte__hittest(x0,y0,x1,y1, id);
+   switch (stbte__ui.event) {
+      case STBTE__paint: {
+         if (stbte__ui.pasting || stbte__ui.dragging || stbte__ui.scrolling)
+            break;
+         if (stbte__ui.scrollkey && !STBTE__IS_MAP_ACTIVE())
+            break;
+         if (STBTE__IS_HOT(id) && STBTE__IS_MAP_ACTIVE() && (tool == STBTE__tool_rect || tool == STBTE__tool_select)) {
+            int rx0,ry0,rx1,ry1,t;
+            // compute the center of each rect
+            rx0 = x0 + tm->spacing_x/2;
+            ry0 = y0 + tm->spacing_y/2;
+            rx1 = rx0 + (stbte__ui.sx - mapx) * tm->spacing_x;
+            ry1 = ry0 + (stbte__ui.sy - mapy) * tm->spacing_y;
+            if (rx0 > rx1) t=rx0,rx0=rx1,rx1=t;
+            if (ry0 > ry1) t=ry0,ry0=ry1,ry1=t;
+            rx0 -= tm->spacing_x/2;
+            ry0 -= tm->spacing_y/2;
+            rx1 += tm->spacing_x/2;
+            ry1 += tm->spacing_y/2;
+            stbte__draw_frame(rx0-1,ry0-1,rx1+1,ry1+1, STBTE_COLOR_TILEMAP_HIGHLIGHT);
+            break;
+         }
+         if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+            stbte__draw_frame(x0-1,y0-1,x1+1,y1+1, STBTE_COLOR_TILEMAP_HIGHLIGHT);
+         }
+#ifdef STBTE_ALLOW_LINK
+         if (stbte__ui.show_links && tm->link[mapy][mapx].x >= 0) {
+            int tx = tm->link[mapy][mapx].x;
+            int ty = tm->link[mapy][mapx].y;
+            int lx0,ly0,lx1,ly1;
+            if (STBTE_ALLOW_LINK(tm->data[mapy][mapx], tm->props[mapy][mapx],
+                                 tm->data[ty  ][tx  ], tm->props[ty  ][tx  ]))
+            {
+               lx0 =  x0 + (tm->spacing_x >> 1) - 1;
+               ly0 =  y0 + (tm->spacing_y >> 1) - 1;
+               lx1 = lx0 + (tx - mapx) * tm->spacing_x + 2;
+               ly1 = ly0 + (ty - mapy) * tm->spacing_y + 2;
+               stbte__draw_link(lx0,ly0,lx1,ly1,
+                   STBTE_LINK_COLOR(tm->data[mapy][mapx], tm->props[mapy][mapx],
+                                    tm->data[ty  ][tx  ], tm->props[ty  ][tx]));
+            }
+         }
+#endif
+         break;
+      }
+   }
+
+   if (stbte__ui.pasting) {
+      switch (stbte__ui.event) {
+         case STBTE__leftdown:
+            if (STBTE__IS_HOT(id)) {
+               stbte__ui.pasting = 0;
+               stbte__paste(tm, mapx, mapy);
+               stbte__activate(0);
+            }
+            break;
+         case STBTE__leftup:
+            // just clear it no matter what, since they might click away to clear it
+            stbte__activate(0);
+            break;
+         case STBTE__rightdown:
+            if (STBTE__IS_HOT(id)) {
+               stbte__activate(0);
+               stbte__ui.pasting = 0;
+            }
+            break;
+      }
+      return;
+   }
+
+   if (stbte__ui.scrolling) {
+      if (stbte__ui.event == STBTE__leftup) {
+         stbte__activate(0);
+         stbte__ui.scrolling = 0;
+      }
+      if (stbte__ui.event == STBTE__mousemove) {
+         tm->scroll_x += (stbte__ui.start_x - stbte__ui.mx);
+         tm->scroll_y += (stbte__ui.start_y - stbte__ui.my);
+         stbte__ui.start_x = stbte__ui.mx;
+         stbte__ui.start_y = stbte__ui.my;
+      }
+      return;
+   }
+
+   // regardless of tool, leftdown is a scrolldrag
+   if (STBTE__IS_HOT(id) && stbte__ui.scrollkey && stbte__ui.event == STBTE__leftdown) {
+      stbte__ui.scrolling = 1;
+      stbte__ui.start_x = stbte__ui.mx;
+      stbte__ui.start_y = stbte__ui.my;
+      return;
+   }
+
+   switch (tool) {
+      case STBTE__tool_brush:
+         switch (stbte__ui.event) {
+            case STBTE__mousemove:
+               if (STBTE__IS_MAP_ACTIVE() && over) {
+                  // don't brush/erase same tile multiple times unless they move away and back @TODO should just be only once, but that needs another data structure
+                  if (!STBTE__IS_ACTIVE(id)) {
+                     if (stbte__ui.active_event == STBTE__leftdown)
+                        stbte__brush(tm, mapx, mapy);
+                     else
+                        stbte__erase(tm, mapx, mapy, stbte__ui.brush_state);
+                     stbte__ui.active_id = id; // switch to this map square so we don't rebrush IT multiple times
+                  }
+               }
+               break;
+            case STBTE__leftdown:
+               if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+                  stbte__activate(id);
+                  stbte__begin_undo(tm);
+                  stbte__brush(tm, mapx, mapy);
+               }
+               break;
+            case STBTE__rightdown:
+               if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+                  stbte__activate(id);
+                  stbte__begin_undo(tm);
+                  if (stbte__erase(tm, mapx, mapy, STBTE__erase_any) == STBTE__erase_brushonly)
+                     stbte__ui.brush_state = STBTE__erase_brushonly;
+                  else
+                     stbte__ui.brush_state = STBTE__erase_any;
+               }
+               break;
+            case STBTE__leftup:
+            case STBTE__rightup:
+               if (STBTE__IS_MAP_ACTIVE()) {
+                  stbte__end_undo(tm);
+                  stbte__activate(0);
+               }
+               break;
+         }
+         break;
+
+#ifdef STBTE_ALLOW_LINK
+      case STBTE__tool_link:
+         switch (stbte__ui.event) {
+            case STBTE__leftdown:
+               if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+                  stbte__activate(id);
+                  stbte__ui.linking = 1;
+                  stbte__ui.sx = mapx;
+                  stbte__ui.sy = mapy;
+                  // @TODO: undo
+               }
+               break;
+            case STBTE__leftup:
+               if (STBTE__IS_HOT(id) && STBTE__IS_MAP_ACTIVE()) {
+                  if ((mapx != stbte__ui.sx || mapy != stbte__ui.sy) &&
+                         STBTE_ALLOW_LINK(tm->data[stbte__ui.sy][stbte__ui.sx], tm->props[stbte__ui.sy][stbte__ui.sx],
+                                          tm->data[mapy][mapx], tm->props[mapy][mapx]))
+                     stbte__set_link(tm, stbte__ui.sx, stbte__ui.sy, mapx, mapy, STBTE__undo_block);
+                  else
+                     stbte__set_link(tm, stbte__ui.sx, stbte__ui.sy, -1,-1, STBTE__undo_block);
+                  stbte__ui.linking = 0;
+                  stbte__activate(0);
+               }
+               break;
+
+            case STBTE__rightdown:
+               if (STBTE__IS_ACTIVE(id)) {
+                  stbte__activate(0);
+                  stbte__ui.linking = 0;
+               }
+               break;
+         }
+         break;
+#endif
+
+      case STBTE__tool_erase:
+         switch (stbte__ui.event) {
+            case STBTE__mousemove:
+               if (STBTE__IS_MAP_ACTIVE() && over)
+                  stbte__erase(tm, mapx, mapy, STBTE__erase_all);
+               break;
+            case STBTE__leftdown:
+               if (STBTE__IS_HOT(id) && STBTE__INACTIVE()) {
+                  stbte__activate(id);
+                  stbte__begin_undo(tm);
+                  stbte__erase(tm, mapx, mapy, STBTE__erase_all);
+               }
+               break;
+            case STBTE__leftup:
+               if (STBTE__IS_MAP_ACTIVE()) {
+                  stbte__end_undo(tm);
+                  stbte__activate(0);
+               }
+               break;
+         }
+         break;
+
+      case STBTE__tool_select:
+         if (STBTE__IS_HOT(id)) {
+            switch (stbte__ui.event) {
+               case STBTE__leftdown:
+                  if (STBTE__INACTIVE()) {
+                     // if we're clicking in an existing selection...
+                     if (stbte__ui.has_selection) {
+                        if (  mapx >= stbte__ui.select_x0 && mapx <= stbte__ui.select_x1
+                           && mapy >= stbte__ui.select_y0 && mapy <= stbte__ui.select_y1)
+                        {
+                           stbte__ui.dragging = 1;
+                           stbte__ui.drag_x = stbte__ui.select_x0;
+                           stbte__ui.drag_y = stbte__ui.select_y0;
+                           stbte__ui.drag_w = stbte__ui.select_x1 - stbte__ui.select_x0 + 1;
+                           stbte__ui.drag_h = stbte__ui.select_y1 - stbte__ui.select_y0 + 1;
+                           stbte__ui.drag_offx = mapx - stbte__ui.select_x0;
+                           stbte__ui.drag_offy = mapy - stbte__ui.select_y0;
+                        }
+                     }
+                     stbte__ui.has_selection = 0; // no selection until it completes
+                     stbte__activate_map(mapx,mapy);
+                  }
+                  break;
+               case STBTE__leftup:
+                  if (STBTE__IS_MAP_ACTIVE()) {
+                     if (stbte__ui.dragging) {
+                        stbte__drag_place(tm, mapx,mapy);
+                        stbte__ui.dragging = 0;
+                        stbte__activate(0);
+                     } else {
+                        stbte__select_rect(tm, stbte__ui.sx, stbte__ui.sy, mapx, mapy);
+                        stbte__activate(0);
+                     }
+                  }
+                  break;
+               case STBTE__rightdown:
+                  stbte__ui.has_selection = 0;
+                  break;
+            }
+         }
+         break;
+
+      case STBTE__tool_rect:
+         if (STBTE__IS_HOT(id)) {
+            switch (stbte__ui.event) {
+               case STBTE__leftdown:
+                  if (STBTE__INACTIVE())
+                     stbte__activate_map(mapx,mapy);
+                  break;
+               case STBTE__leftup:
+                  if (STBTE__IS_MAP_ACTIVE()) {
+                     stbte__fillrect(tm, stbte__ui.sx, stbte__ui.sy, mapx, mapy, 1);
+                     stbte__activate(0);
+                  }
+                  break;
+               case STBTE__rightdown:
+                  if (STBTE__INACTIVE())
+                     stbte__activate_map(mapx,mapy);
+                  break;
+               case STBTE__rightup:
+                  if (STBTE__IS_MAP_ACTIVE()) {
+                     stbte__fillrect(tm, stbte__ui.sx, stbte__ui.sy, mapx, mapy, 0);
+                     stbte__activate(0);
+                  }
+                  break;
+            }
+         }
+         break;
+
+
+      case STBTE__tool_eyedrop:
+         switch (stbte__ui.event) {
+            case STBTE__leftdown:
+               if (STBTE__IS_HOT(id) && STBTE__INACTIVE())
+                  stbte__eyedrop(tm,mapx,mapy);
+               break;
+         }
+         break;
+   }
+}
+
+static void stbte__start_paste(stbte_tilemap *tm)
+{
+   if (stbte__ui.has_copy) {
+      stbte__ui.pasting = 1;
+      stbte__activate(STBTE__ID(STBTE__toolbarB,3));
+   }
+}
+
+static void stbte__toolbar(stbte_tilemap *tm, int x0, int y0, int w, int h)
+{
+   int i;
+   int estimated_width = 13 * STBTE__num_tool + 8+8+ 120+4 - 30;
+   int x = x0 + w/2 - estimated_width/2;
+   int y = y0+1;
+
+   for (i=0; i < STBTE__num_tool; ++i) {
+      int highlight=0, disable=0;
+      highlight = (stbte__ui.tool == i);
+      if (i == STBTE__tool_undo || i == STBTE__tool_showgrid)
+          x += 8;
+      if (i == STBTE__tool_showgrid && stbte__ui.show_grid)
+         highlight = 1;
+      if (i == STBTE__tool_showlinks && stbte__ui.show_links)
+         highlight = 1;
+      if (i == STBTE__tool_fill)
+         continue;
+      #ifndef STBTE_ALLOW_LINK
+      if (i == STBTE__tool_link || i == STBTE__tool_showlinks)
+         disable = 1;
+      #endif
+      if (i == STBTE__tool_undo && !stbte__undo_available(tm))
+         disable = 1;
+      if (i == STBTE__tool_redo && !stbte__redo_available(tm))
+         disable = 1;
+      if (stbte__button_icon(STBTE__ctoolbar_button, toolchar[i], x, y, 13, STBTE__ID(STBTE__toolbarA, i), highlight, disable)) {
+         switch (i) {
+            case STBTE__tool_eyedrop:
+               stbte__ui.eyedrop_last_layer = tm->num_layers; // flush eyedropper state
+               // fallthrough
+            default:
+               stbte__ui.tool = i;
+               stbte__ui.has_selection = 0;
+               break;
+            case STBTE__tool_showlinks:
+               stbte__ui.show_links = !stbte__ui.show_links;
+               break;
+            case STBTE__tool_showgrid:
+               stbte__ui.show_grid = (stbte__ui.show_grid+1)%3;
+               break;
+            case STBTE__tool_undo:
+               stbte__undo(tm);
+               break;
+            case STBTE__tool_redo:
+               stbte__redo(tm);
+               break;
+         }
+      }
+      x += 13;
+   }
+
+   x += 8;
+   if (stbte__button(STBTE__ctoolbar_button, "cut"  , x, y,10, 40, STBTE__ID(STBTE__toolbarB,0), 0, !stbte__ui.has_selection))
+      stbte__copy_cut(tm, 1);
+   x += 42;
+   if (stbte__button(STBTE__ctoolbar_button, "copy" , x, y, 5, 40, STBTE__ID(STBTE__toolbarB,1), 0, !stbte__ui.has_selection))
+      stbte__copy_cut(tm, 0);
+   x += 42;
+   if (stbte__button(STBTE__ctoolbar_button, "paste", x, y, 0, 40, STBTE__ID(STBTE__toolbarB,2), stbte__ui.pasting, !stbte__ui.has_copy))
+      stbte__start_paste(tm);
+}
+
+#define STBTE__TEXTCOLOR(n)  stbte__color_table[n][STBTE__text][STBTE__idle]
+
+static int stbte__info_value(const char *label, int x, int y, int val, int digits, int id)
+{
+   if (stbte__ui.event == STBTE__paint) {
+      int off = 9-stbte__get_char_width(label[0]);
+      char text[16];
+      stbte__sprintf(text stbte__sizeof(text), label, digits, val);
+      stbte__draw_text_core(x+off,y, text, 999, STBTE__TEXTCOLOR(STBTE__cpanel),1);
+   }
+   if (id) {
+      x += 9+7*digits+4;
+      if (stbte__minibutton(STBTE__cmapsize, x,y, '+', STBTE__ID2(id,1,0)))
+         val += (stbte__ui.shift ? 10 : 1);
+      x += 9;
+      if (stbte__minibutton(STBTE__cmapsize, x,y, '-', STBTE__ID2(id,2,0)))
+         val -= (stbte__ui.shift ? 10 : 1);
+      if (val < 1) val = 1; else if (val > 4096) val = 4096;
+   }
+   return val;
+}
+
+static void stbte__info(stbte_tilemap *tm, int x0, int y0, int w, int h)
+{
+   int mode = stbte__ui.panel[STBTE__panel_info].mode;
+   int s = 11+7*tm->digits+4+15;
+   int x,y;
+   int in_region;
+
+   x = x0+2;
+   y = y0+2;
+   tm->max_x = stbte__info_value("w:%*d",x,y, tm->max_x, tm->digits, STBTE__ID(STBTE__info,0));
+   if (mode)
+      x += s;
+   else
+      y += 11;
+   tm->max_y = stbte__info_value("h:%*d",x,y, tm->max_y, tm->digits, STBTE__ID(STBTE__info,1));
+   x = x0+2;
+   y += 11;
+   in_region = (stbte__ui.hot_id & 127) == STBTE__map;
+   stbte__info_value(in_region ? "x:%*d" : "x:",x,y, (stbte__ui.hot_id>>19)&4095, tm->digits, 0);
+   if (mode)
+      x += s;
+   else
+      y += 11;
+   stbte__info_value(in_region ? "y:%*d" : "y:",x,y, (stbte__ui.hot_id>> 7)&4095, tm->digits, 0);
+   y += 15;
+   x = x0+2;
+   stbte__draw_text(x,y,"brush:",40,STBTE__TEXTCOLOR(STBTE__cpanel));
+   if (tm->cur_tile >= 0)
+      STBTE_DRAW_TILE(x+43,y-3,tm->tiles[tm->cur_tile].id,1,0);
+}
+
+static void stbte__layers(stbte_tilemap *tm, int x0, int y0, int w, int h)
+{
+   static const char *propmodes[3] = {
+      "default", "always", "never"
+   };
+   int num_rows;
+   int i, y, n;
+   int x1 = x0+w;
+   int y1 = y0+h;
+   int xoff = 20;
+
+   if (tm->has_layer_names) {
+      int side = stbte__ui.panel[STBTE__panel_layers].side;
+      xoff = stbte__region[side].width - 42;
+      xoff = (xoff < tm->layername_width + 10 ? xoff : tm->layername_width + 10);
+   }
+
+   x0 += 2;
+   y0 += 5;
+   if (!tm->has_layer_names) {
+      if (stbte__ui.event == STBTE__paint) {
+         stbte__draw_text(x0,y0, "Layers", w-4, STBTE__TEXTCOLOR(STBTE__cpanel));
+      }
+      y0 += 11;
+   }
+   num_rows = (y1-y0)/15;
+#ifndef STBTE_NO_PROPS
+   --num_rows;
+#endif
+   y = y0;
+   for (i=0; i < tm->num_layers; ++i) {
+      char text[3], *str = (char *) tm->layerinfo[i].name;
+      static char lockedchar[3] = { 'U', 'P', 'L' };
+      int locked = tm->layerinfo[i].locked;
+      int disabled = (tm->solo_layer >= 0 && tm->solo_layer != i);
+      if (i-tm->layer_scroll >= 0 && i-tm->layer_scroll < num_rows) {
+         if (str == NULL)
+            stbte__sprintf(str=text stbte__sizeof(text), "%2d", i+1);
+         if (stbte__button(STBTE__clayer_button, str, x0,y,(i+1<10)*2,xoff-2, STBTE__ID(STBTE__layer,i), tm->cur_layer==i,0))
+            tm->cur_layer = (tm->cur_layer == i ? -1 : i);
+         if (stbte__layerbutton(x0+xoff +  0,y+1,'H',STBTE__ID(STBTE__hide,i), tm->layerinfo[i].hidden,disabled,STBTE__clayer_hide))
+            tm->layerinfo[i].hidden = !tm->layerinfo[i].hidden;
+         if (stbte__layerbutton(x0+xoff + 12,y+1,lockedchar[locked],STBTE__ID(STBTE__lock,i), locked!=0,disabled,STBTE__clayer_lock))
+            tm->layerinfo[i].locked = (locked+1)%3;
+         if (stbte__layerbutton(x0+xoff + 24,y+1,'S',STBTE__ID(STBTE__solo,i), tm->solo_layer==i,0,STBTE__clayer_solo))
+            tm->solo_layer = (tm->solo_layer == i ? -1 : i);
+         y += 15;
+      }
+   }
+   stbte__scrollbar(x1-4, y0,y-2, &tm->layer_scroll, 0, tm->num_layers, num_rows, STBTE__ID(STBTE__scrollbar_id, STBTE__layer));
+#ifndef STBTE_NO_PROPS
+   n = stbte__text_width("prop:")+2;
+   stbte__draw_text(x0,y+2, "prop:", w, STBTE__TEXTCOLOR(STBTE__cpanel));
+   i = w - n - 4;
+   if (i > 50) i = 50;
+   if (stbte__button(STBTE__clayer_button, propmodes[tm->propmode], x0+n,y,0,i, STBTE__ID(STBTE__layer,256), 0,0))
+      tm->propmode = (tm->propmode+1)%3;
+#endif
+}
+
+static void stbte__categories(stbte_tilemap *tm, int x0, int y0, int w, int h)
+{
+   int s=11, x,y, i;
+   int num_rows = h / s;
+
+   w -= 4;
+   x = x0+2;
+   y = y0+4;
+   if (tm->category_scroll == 0) {
+      if (stbte__category_button("*ALL*", x,y, w, STBTE__ID(STBTE__categories, 65535), tm->cur_category == -1)) {
+         stbte__choose_category(tm, -1);
+      }
+      y += s;
+   }
+
+   for (i=0; i < tm->num_categories; ++i) {
+      if (i+1 - tm->category_scroll >= 0 && i+1 - tm->category_scroll < num_rows) {
+         if (y + 10 > y0+h)
+            return;
+         if (stbte__category_button(tm->categories[i], x,y,w, STBTE__ID(STBTE__categories,i), tm->cur_category == i))
+            stbte__choose_category(tm, i);
+         y += s;
+      }
+   }
+   stbte__scrollbar(x0+w, y0+4, y0+h-4, &tm->category_scroll, 0, tm->num_categories+1, num_rows, STBTE__ID(STBTE__scrollbar_id, STBTE__categories));
+}
+
+static void stbte__tile_in_palette(stbte_tilemap *tm, int x, int y, int slot)
+{
+   stbte__tileinfo *t = &tm->tiles[slot];
+   int x0=x, y0=y, x1 = x+tm->palette_spacing_x - 1, y1 = y+tm->palette_spacing_y;
+   int id = STBTE__ID(STBTE__palette, slot);
+   stbte__hittest(x0,y0,x1,y1, id);
+   switch (stbte__ui.event) {
+      case STBTE__paint:
+         stbte__draw_rect(x,y,x+tm->palette_spacing_x-1,y+tm->palette_spacing_x-1, STBTE_COLOR_TILEPALETTE_BACKGROUND);
+         STBTE_DRAW_TILE(x,y,id, slot == tm->cur_tile,0);
+         if (slot == tm->cur_tile)
+            stbte__draw_frame_delayed(x-1,y-1,x+tm->palette_spacing_x,y+tm->palette_spacing_y, STBTE_COLOR_TILEPALETTE_OUTLINE);
+         break;
+      default:
+         if (stbte__button_core(id))
+            tm->cur_tile = slot;
+         break;
+   }
+}
+
+static void stbte__palette_of_tiles(stbte_tilemap *tm, int x0, int y0, int w, int h)
+{
+   int i,x,y;
+   int num_vis_rows = (h-6) / tm->palette_spacing_y;
+   int num_columns = (w-2-6) / tm->palette_spacing_x;
+   int num_total_rows;
+   int column,row;
+   int x1 = x0+w, y1=y0+h;
+   x = x0+2;
+   y = y0+6;
+
+   if (num_columns == 0)
+      return;
+
+   num_total_rows = (tm->cur_palette_count + num_columns-1) / num_columns; // ceil()
+
+   column = 0;
+   row    = -tm->palette_scroll;
+   for (i=0; i < tm->num_tiles; ++i) {
+      stbte__tileinfo *t = &tm->tiles[i];
+
+      // filter based on category
+      if (tm->cur_category >= 0 && t->category_id != tm->cur_category)
+         continue;
+
+      // display it
+      if (row >= 0 && row < num_vis_rows) {
+         x = x0 + 2 + tm->palette_spacing_x * column;
+         y = y0 + 6 + tm->palette_spacing_y * row;
+         stbte__tile_in_palette(tm,x,y,i);
+      }
+
+      ++column;
+      if (column == num_columns) {
+         column = 0;
+         ++row;
+      }
+   }
+   stbte__flush_delay();
+   stbte__scrollbar(x1-4, y0+6, y1-2, &tm->palette_scroll, 0, num_total_rows, num_vis_rows, STBTE__ID(STBTE__scrollbar_id, STBTE__palette));
+}
+
+static float stbte__saved;
+static void stbte__props_panel(stbte_tilemap *tm, int x0, int y0, int w, int h)
+{
+   int x1 = x0+w;
+   int i;
+   int y = y0 + 5, x = x0+2;
+   int slider_width = 60;
+   int mx,my;
+   float *p;
+   short *data;
+   if (!stbte__is_single_selection())
+      return;
+   mx = stbte__ui.select_x0;
+   my = stbte__ui.select_y0;
+   p = tm->props[my][mx];
+   data = tm->data[my][mx];
+   STBTE__NOTUSED(data);
+   for (i=0; i < STBTE_MAX_PROPERTIES; ++i) {
+      unsigned int n = STBTE_PROP_TYPE(i, data, p);
+      if (n) {
+         char *s = (char*) STBTE_PROP_NAME(i, data, p);
+         if (s == NULL) s = (char*) "";
+         switch (n & 3) {
+            case STBTE_PROP_bool: {
+               int flag = (int) p[i];
+               if (stbte__layerbutton(x,y, flag ? 'x' : ' ', STBTE__ID(STBTE__prop_flag,i), flag, 0, 2)) {
+                  stbte__begin_undo(tm);
+                  stbte__undo_record_prop_float(tm,mx,my,i,(float) flag);
+                  p[i] = (float) !flag;
+                  stbte__end_undo(tm);
+               }
+               stbte__draw_text(x+13,y+1,s,x1-(x+13)-2,STBTE__TEXTCOLOR(STBTE__cpanel));
+               y += 13;
+               break;
+            }
+            case STBTE_PROP_int: {
+               int a = (int) STBTE_PROP_MIN(i,data,p);
+               int b = (int) STBTE_PROP_MAX(i,data,p);
+               int v = (int) p[i] - a;
+               if (a+v != p[i] || v < 0 || v > b-a) {
+                  if (v < 0) v = 0;
+                  if (v > b-a) v = b-a;
+                  p[i] = (float) (a+v); // @TODO undo
+               }
+               switch (stbte__slider(x, slider_width, y+7, b-a, &v, STBTE__ID(STBTE__prop_int,i)))
+               {
+                  case STBTE__begin:
+                     stbte__saved = p[i];
+                     // fallthrough
+                  case STBTE__change:
+                     p[i] = (float) (a+v); // @TODO undo
+                     break;
+                  case STBTE__end:
+                     if (p[i] != stbte__saved) {
+                        stbte__begin_undo(tm);
+                        stbte__undo_record_prop_float(tm,mx,my,i,stbte__saved);
+                        stbte__end_undo(tm);
+                     }
+                     break;
+               }
+               stbte__draw_text(x+slider_width+2,y+2, s, x1-1-(x+slider_width+2), STBTE__TEXTCOLOR(STBTE__cpanel));
+               y += 12;
+               break;
+            }
+            case STBTE_PROP_float: {
+               float a = (float) STBTE_PROP_MIN(i, data,p);
+               float b = (float) STBTE_PROP_MAX(i, data,p);
+               float c = STBTE_PROP_FLOAT_SCALE(i, data, p);
+               float old;
+               if (p[i] < a || p[i] > b) {
+                  // @TODO undo
+                  if (p[i] < a) p[i] = a;
+                  if (p[i] > b) p[i] = b;
+               }
+               old = p[i];
+               switch (stbte__float_control(x, y, 50, a, b, c, "%8.4f", &p[i], STBTE__layer,STBTE__ID(STBTE__prop_float,i))) {
+                  case STBTE__begin:
+                     stbte__saved = old;
+                     break;
+                  case STBTE__end:
+                     if (stbte__saved != p[i]) {
+                        stbte__begin_undo(tm);
+                        stbte__undo_record_prop_float(tm,mx,my,i, stbte__saved);
+                        stbte__end_undo(tm);
+                     }
+                     break;
+               }
+               stbte__draw_text(x+53,y+1, s, x1-1-(x+53), STBTE__TEXTCOLOR(STBTE__cpanel));
+               y += 12;
+               break;
+            }
+         }
+      }
+   }
+}
+
+static int stbte__cp_mode, stbte__cp_aspect, stbte__save, stbte__cp_altered;
+#ifdef STBTE__COLORPICKER
+static int stbte__cp_state, stbte__cp_index, stbte__color_copy;
+static void stbte__dump_colorstate(void)
+{
+   int i,j,k;
+   printf("static int stbte__color_table[STBTE__num_color_modes][STBTE__num_color_aspects][STBTE__num_color_states] =\n");
+   printf("{\n");
+   printf("   {\n");
+   for (k=0; k < STBTE__num_color_modes; ++k) {
+      for (j=0; j < STBTE__num_color_aspects; ++j) {
+         printf("      { ");
+         for (i=0; i < STBTE__num_color_states; ++i) {
+            printf("0x%06x, ", stbte__color_table[k][j][i]);
+         }
+         printf("},\n");
+      }
+      if (k+1 < STBTE__num_color_modes)
+         printf("   }, {\n");
+      else
+         printf("   },\n");
+   }
+   printf("};\n");
+}
+
+static void stbte__colorpicker(int x0, int y0, int w, int h)
+{
+   int x1 = x0+w, y1 = y0+h, x,y, i;
+
+   x =  x0+2; y = y0+6;
+
+   y += 5;
+   x += 8;
+
+
+   {
+      int color = stbte__color_table[stbte__cp_mode][stbte__cp_aspect][stbte__cp_index];
+      int rgb[3];
+      if (stbte__cp_altered && stbte__cp_index == STBTE__idle)
+         color = stbte__save;
+
+      if (stbte__minibutton(STBTE__cmapsize, x1-20,y+ 5, 'C', STBTE__ID2(STBTE__colorpick_id,4,0)))
+         stbte__color_copy = color;
+      if (stbte__minibutton(STBTE__cmapsize, x1-20,y+15, 'P', STBTE__ID2(STBTE__colorpick_id,4,1)))
+         color = stbte__color_copy;
+
+      rgb[0] = color >> 16; rgb[1] = (color>>8)&255; rgb[2] = color & 255;
+      for (i=0; i < 3; ++i) {
+         if (stbte__slider(x+8,64, y, 255, rgb+i, STBTE__ID2(STBTE__colorpick_id,3,i)) > 0)
+            stbte__dump_colorstate();
+         y += 15;
+      }
+      if (stbte__ui.event != STBTE__paint && stbte__ui.event != STBTE__tick)
+         stbte__color_table[stbte__cp_mode][stbte__cp_aspect][stbte__cp_index] = (rgb[0]<<16)|(rgb[1]<<8)|(rgb[2]);
+   }
+
+   y += 5;
+
+   // states
+   x = x0+2+35;
+   if (stbte__ui.event == STBTE__paint) {
+      static char *states[] = { "idle", "over", "down", "down&over", "selected", "selected&over", "disabled" };
+      stbte__draw_text(x, y+1, states[stbte__cp_index], x1-x-1, 0xffffff);
+   }
+
+   x = x0+24; y += 12;
+
+   for (i=3; i >= 0; --i) {
+      int state = 0 != (stbte__cp_state & (1 << i));
+      if (stbte__layerbutton(x,y, "OASD"[i], STBTE__ID2(STBTE__colorpick_id, 0,i), state,0, STBTE__clayer_button)) {
+         stbte__cp_state ^= (1 << i);
+         stbte__cp_index = stbte__state_to_index[0][0][0][stbte__cp_state];
+      }
+      x += 16;
+   }
+   x = x0+2; y += 18;
+
+   for (i=0; i < 3; ++i) {
+      static char *labels[] = { "Base", "Edge", "Text" };
+      if (stbte__button(STBTE__ctoolbar_button, labels[i], x,y,0,36, STBTE__ID2(STBTE__colorpick_id,1,i), stbte__cp_aspect==i,0))
+         stbte__cp_aspect = i;
+      x += 40;
+   }
+
+   y += 18;
+   x = x0+2;
+
+   for (i=0; i < STBTE__num_color_modes; ++i) {
+      if (stbte__button(STBTE__ctoolbar_button, stbte__color_names[i], x, y, 0,80, STBTE__ID2(STBTE__colorpick_id,2,i), stbte__cp_mode == i,0))
+         stbte__cp_mode = i;
+      y += 12;
+   }
+
+   // make the currently selected aspect flash, unless we're actively dragging color slider etc
+   if (stbte__ui.event == STBTE__tick) {
+      stbte__save = stbte__color_table[stbte__cp_mode][stbte__cp_aspect][STBTE__idle];
+      if ((stbte__ui.active_id & 127) != STBTE__colorpick_id) {
+         if ((stbte__ui.ms_time & 2047) < 200) {
+            stbte__color_table[stbte__cp_mode][stbte__cp_aspect][STBTE__idle] ^= 0x1f1f1f;
+            stbte__cp_altered = 1;
+         }
+      }
+   }
+}
+#endif
+
+static void stbte__editor_traverse(stbte_tilemap *tm)
+{
+   int i,j,i0,j0,i1,j1,n;
+
+   if (tm == NULL)
+      return;
+   if (stbte__ui.x0 == stbte__ui.x1 || stbte__ui.y0 == stbte__ui.y1)
+      return;
+
+   stbte__prepare_tileinfo(tm);
+
+   stbte__compute_panel_locations(tm); // @OPTIMIZE: we don't need to recompute this every time
+
+   if (stbte__ui.event == STBTE__paint) {
+      // fill screen with border
+      stbte__draw_rect(stbte__ui.x0, stbte__ui.y0, stbte__ui.x1, stbte__ui.y1, STBTE_COLOR_TILEMAP_BORDER);
+      // fill tilemap with tilemap background
+      stbte__draw_rect(stbte__ui.x0 - tm->scroll_x, stbte__ui.y0 - tm->scroll_y,
+                       stbte__ui.x0 - tm->scroll_x + tm->spacing_x * tm->max_x,
+                       stbte__ui.y0 - tm->scroll_y + tm->spacing_y * tm->max_y, STBTE_COLOR_TILEMAP_BACKGROUND);
+   }
+
+   // step 1: traverse all the tilemap data...
+
+   i0 = (tm->scroll_x - tm->spacing_x) / tm->spacing_x;
+   j0 = (tm->scroll_y - tm->spacing_y) / tm->spacing_y;
+   i1 = (tm->scroll_x + stbte__ui.x1 - stbte__ui.x0) / tm->spacing_x + 1;
+   j1 = (tm->scroll_y + stbte__ui.y1 - stbte__ui.y0) / tm->spacing_y + 1;
+
+   if (i0 < 0) i0 = 0;
+   if (j0 < 0) j0 = 0;
+   if (i1 > tm->max_x) i1 = tm->max_x;
+   if (j1 > tm->max_y) j1 = tm->max_y;
+
+   if (stbte__ui.event == STBTE__paint) {
+      // draw all of layer 0, then all of layer 1, etc, instead of old
+      // way which drew entire stack of each tile at once
+      for (n=0; n < tm->num_layers; ++n) {
+         for (j=j0; j < j1; ++j) {
+            for (i=i0; i < i1; ++i) {
+               int x = stbte__ui.x0 + i * tm->spacing_x - tm->scroll_x;
+               int y = stbte__ui.y0 + j * tm->spacing_y - tm->scroll_y;
+               stbte__tile_paint(tm, x, y, i, j, n);
+            }
+         }
+         if (n == 0 && stbte__ui.show_grid == 1) {
+            int x = stbte__ui.x0 + i0 * tm->spacing_x - tm->scroll_x;
+            int y = stbte__ui.y0 + j0 * tm->spacing_y - tm->scroll_y;
+            for (i=0; x < stbte__ui.x1 && i <= i1; ++i, x += tm->spacing_x)
+               stbte__draw_rect(x, stbte__ui.y0, x+1, stbte__ui.y1, STBTE_COLOR_GRID);
+            for (j=0; y < stbte__ui.y1 && j <= j1; ++j, y += tm->spacing_y)
+               stbte__draw_rect(stbte__ui.x0, y, stbte__ui.x1, y+1, STBTE_COLOR_GRID);
+         }
+      }
+   }
+
+   if (stbte__ui.event == STBTE__paint) {
+      // draw grid on top of everything except UI
+      if (stbte__ui.show_grid == 2) {
+         int x = stbte__ui.x0 + i0 * tm->spacing_x - tm->scroll_x;
+         int y = stbte__ui.y0 + j0 * tm->spacing_y - tm->scroll_y;
+         for (i=0; x < stbte__ui.x1 && i <= i1; ++i, x += tm->spacing_x)
+            stbte__draw_rect(x, stbte__ui.y0, x+1, stbte__ui.y1, STBTE_COLOR_GRID);
+         for (j=0; y < stbte__ui.y1 && j <= j1; ++j, y += tm->spacing_y)
+            stbte__draw_rect(stbte__ui.x0, y, stbte__ui.x1, y+1, STBTE_COLOR_GRID);
+      }
+   }
+
+   for (j=j0; j < j1; ++j) {
+      for (i=i0; i < i1; ++i) {
+         int x = stbte__ui.x0 + i * tm->spacing_x - tm->scroll_x;
+         int y = stbte__ui.y0 + j * tm->spacing_y - tm->scroll_y;
+         stbte__tile(tm, x, y, i, j);
+      }
+   }
+
+   if (stbte__ui.event == STBTE__paint) {
+      // draw the selection border
+      if (stbte__ui.has_selection) {
+         int x0,y0,x1,y1;
+         x0 = stbte__ui.x0 + (stbte__ui.select_x0    ) * tm->spacing_x - tm->scroll_x;
+         y0 = stbte__ui.y0 + (stbte__ui.select_y0    ) * tm->spacing_y - tm->scroll_y;
+         x1 = stbte__ui.x0 + (stbte__ui.select_x1 + 1) * tm->spacing_x - tm->scroll_x + 1;
+         y1 = stbte__ui.y0 + (stbte__ui.select_y1 + 1) * tm->spacing_y - tm->scroll_y + 1;
+         stbte__draw_frame(x0,y0,x1,y1, (stbte__ui.ms_time & 256 ? STBTE_COLOR_SELECTION_OUTLINE1 : STBTE_COLOR_SELECTION_OUTLINE2));
+      }
+
+      stbte__flush_delay(); // draw a dynamic link on top of the queued links
+
+      #ifdef STBTE_ALLOW_LINK
+      if (stbte__ui.linking && STBTE__IS_MAP_HOT()) {
+         int x0,y0,x1,y1;
+         int color;
+         int ex = ((stbte__ui.hot_id >> 19) & 4095);
+         int ey = ((stbte__ui.hot_id >>  7) & 4095);
+         x0 = stbte__ui.x0 + (stbte__ui.sx    ) * tm->spacing_x - tm->scroll_x + (tm->spacing_x>>1)+1;
+         y0 = stbte__ui.y0 + (stbte__ui.sy    ) * tm->spacing_y - tm->scroll_y + (tm->spacing_y>>1)+1;
+         x1 = stbte__ui.x0 + (ex              ) * tm->spacing_x - tm->scroll_x + (tm->spacing_x>>1)-1;
+         y1 = stbte__ui.y0 + (ey              ) * tm->spacing_y - tm->scroll_y + (tm->spacing_y>>1)-1;
+         if (STBTE_ALLOW_LINK(tm->data[stbte__ui.sy][stbte__ui.sx], tm->props[stbte__ui.sy][stbte__ui.sx], tm->data[ey][ex], tm->props[ey][ex]))
+            color = STBTE_LINK_COLOR_DRAWING;
+         else
+            color = STBTE_LINK_COLOR_DISALLOWED;
+         stbte__draw_link(x0,y0,x1,y1, color);
+      }
+      #endif
+   }
+   stbte__flush_delay();
+
+   // step 2: traverse the panels
+   for (i=0; i < STBTE__num_panel; ++i) {
+      stbte__panel *p = &stbte__ui.panel[i];
+      if (stbte__ui.event == STBTE__paint) {
+         stbte__draw_box(p->x0,p->y0,p->x0+p->width,p->y0+p->height, STBTE__cpanel, STBTE__idle);
+      }
+      // obscure tilemap data underneath panel
+      stbte__hittest(p->x0,p->y0,p->x0+p->width,p->y0+p->height, STBTE__ID2(STBTE__panel, i, 0));
+      switch (i) {
+         case STBTE__panel_toolbar:
+            if (stbte__ui.event == STBTE__paint)
+               stbte__draw_rect(p->x0,p->y0,p->x0+p->width,p->y0+p->height, stbte__color_table[STBTE__ctoolbar][STBTE__base][STBTE__idle]);
+            stbte__toolbar(tm,p->x0,p->y0,p->width,p->height);
+            break;
+         case STBTE__panel_info:
+            stbte__info(tm,p->x0,p->y0,p->width,p->height);
+            break;
+         case STBTE__panel_layers:
+            stbte__layers(tm,p->x0,p->y0,p->width,p->height);
+            break;
+         case STBTE__panel_categories:
+            stbte__categories(tm,p->x0,p->y0,p->width,p->height);
+            break;
+         case STBTE__panel_colorpick:
+#ifdef STBTE__COLORPICKER
+            stbte__colorpicker(p->x0,p->y0,p->width,p->height);
+#endif
+            break;
+         case STBTE__panel_tiles:
+            // erase boundary between categories and tiles if they're on same side
+            if (stbte__ui.event == STBTE__paint && p->side == stbte__ui.panel[STBTE__panel_categories].side)
+               stbte__draw_rect(p->x0+1,p->y0-1,p->x0+p->width-1,p->y0+1, stbte__color_table[STBTE__cpanel][STBTE__base][STBTE__idle]);
+            stbte__palette_of_tiles(tm,p->x0,p->y0,p->width,p->height);
+            break;
+         case STBTE__panel_props:
+            stbte__props_panel(tm,p->x0,p->y0,p->width,p->height);
+            break;
+      }
+      // draw the panel side selectors
+      for (j=0; j < 2; ++j) {
+         int result;
+         if (i == STBTE__panel_toolbar) continue;
+         result = stbte__microbutton(p->x0+p->width - 1 - 2*4 + 4*j,p->y0+2,3, STBTE__ID2(STBTE__panel, i, j+1), STBTE__cpanel_sider+j);
+         if (result) {
+            switch (j) {
+               case 0: p->side = result > 0 ? STBTE__side_left : STBTE__side_right; break;
+               case 1: p->delta_height += result; break;
+            }
+         }
+      }
+   }
+
+   if (stbte__ui.panel[STBTE__panel_categories].delta_height < -5) stbte__ui.panel[STBTE__panel_categories].delta_height = -5;
+   if (stbte__ui.panel[STBTE__panel_layers    ].delta_height < -5) stbte__ui.panel[STBTE__panel_layers    ].delta_height = -5;
+
+
+   // step 3: traverse the regions to place expander controls on them
+   for (i=0; i < 2; ++i) {
+      if (stbte__region[i].active) {
+         int x = stbte__region[i].x;
+         int width;
+         if (i == STBTE__side_left)
+            width =  stbte__ui.left_width , x += stbte__region[i].width + 1;
+         else
+            width = -stbte__ui.right_width, x -= 6;
+         if (stbte__microbutton_dragger(x, stbte__region[i].y+2, 5, STBTE__ID(STBTE__region,i), &width)) {
+            // if non-0, it is expanding, so retract it
+            if (stbte__region[i].retracted == 0.0)
+               stbte__region[i].retracted = 0.01f;
+            else
+               stbte__region[i].retracted = 0.0;
+         }
+         if (i == STBTE__side_left)
+            stbte__ui.left_width  =  width;
+         else
+            stbte__ui.right_width = -width;
+         if (stbte__ui.event == STBTE__tick) {
+            if (stbte__region[i].retracted && stbte__region[i].retracted < 1.0f) {
+               stbte__region[i].retracted += stbte__ui.dt*4;
+               if (stbte__region[i].retracted > 1)
+                  stbte__region[i].retracted = 1;
+            }
+         }
+      }
+   }
+
+   if (stbte__ui.event == STBTE__paint && stbte__ui.alert_msg) {
+      int w = stbte__text_width(stbte__ui.alert_msg);
+      int x = (stbte__ui.x0+stbte__ui.x1)/2;
+      int y = (stbte__ui.y0+stbte__ui.y1)*5/6;
+      stbte__draw_rect (x-w/2-4,y-8, x+w/2+4,y+8, 0x604020);
+      stbte__draw_frame(x-w/2-4,y-8, x+w/2+4,y+8, 0x906030);
+      stbte__draw_text (x-w/2,y-4, stbte__ui.alert_msg, w+1, 0xff8040);
+   }
+
+#ifdef STBTE_SHOW_CURSOR
+   if (stbte__ui.event == STBTE__paint)
+      stbte__draw_bitmap(stbte__ui.mx, stbte__ui.my, stbte__get_char_width(26), stbte__get_char_bitmap(26), 0xe0e0e0);
+#endif
+
+   if (stbte__ui.event == STBTE__tick && stbte__ui.alert_msg) {
+      stbte__ui.alert_timer -= stbte__ui.dt;
+      if (stbte__ui.alert_timer < 0) {
+         stbte__ui.alert_timer = 0;
+         stbte__ui.alert_msg = 0;
+      }
+   }
+
+   if (stbte__ui.event == STBTE__paint) {
+      stbte__color_table[stbte__cp_mode][stbte__cp_aspect][STBTE__idle] = stbte__save;
+      stbte__cp_altered = 0;
+   }
+}
+
+static void stbte__do_event(stbte_tilemap *tm)
+{
+   stbte__ui.next_hot_id = 0;
+   stbte__editor_traverse(tm);
+   stbte__ui.hot_id = stbte__ui.next_hot_id;
+
+   // automatically cancel on mouse-up in case the object that triggered it
+   // doesn't exist anymore
+   if (stbte__ui.active_id) {
+      if (stbte__ui.event == STBTE__leftup || stbte__ui.event == STBTE__rightup) {
+         if (!stbte__ui.pasting) {
+            stbte__activate(0);
+            if (stbte__ui.undoing)
+               stbte__end_undo(tm);
+            stbte__ui.scrolling = 0;
+            stbte__ui.dragging = 0;
+            stbte__ui.linking = 0;
+         }
+      }
+   }
+
+   // we could do this stuff in the widgets directly, but it would keep recomputing
+   // the same thing on every tile, which seems dumb.
+
+   if (stbte__ui.pasting) {
+      if (STBTE__IS_MAP_HOT()) {
+         // compute pasting location based on last hot
+         stbte__ui.paste_x = ((stbte__ui.hot_id >> 19) & 4095) - (stbte__ui.copy_width >> 1);
+         stbte__ui.paste_y = ((stbte__ui.hot_id >>  7) & 4095) - (stbte__ui.copy_height >> 1);
+      }
+   }
+   if (stbte__ui.dragging) {
+      if (STBTE__IS_MAP_HOT()) {
+         stbte__ui.drag_dest_x = ((stbte__ui.hot_id >> 19) & 4095) - stbte__ui.drag_offx;
+         stbte__ui.drag_dest_y = ((stbte__ui.hot_id >>  7) & 4095) - stbte__ui.drag_offy;
+      }
+   }
+}
+
+static void stbte__set_event(int event, int x, int y)
+{
+   stbte__ui.event = event;
+   stbte__ui.mx    = x;
+   stbte__ui.my    = y;
+   stbte__ui.dx    = x - stbte__ui.last_mouse_x;
+   stbte__ui.dy    = y - stbte__ui.last_mouse_y;
+   stbte__ui.last_mouse_x = x;
+   stbte__ui.last_mouse_y = y;
+   stbte__ui.accum_x += stbte__ui.dx;
+   stbte__ui.accum_y += stbte__ui.dy;
+}
+
+void stbte_draw(stbte_tilemap *tm)
+{
+   stbte__ui.event = STBTE__paint;
+   stbte__editor_traverse(tm);
+}
+
+void stbte_mouse_move(stbte_tilemap *tm, int x, int y, int shifted, int scrollkey)
+{
+   stbte__set_event(STBTE__mousemove, x,y);
+   stbte__ui.shift = shifted;
+   stbte__ui.scrollkey = scrollkey;
+   stbte__do_event(tm);
+}
+
+void stbte_mouse_button(stbte_tilemap *tm, int x, int y, int right, int down, int shifted, int scrollkey)
+{
+   static int events[2][2] = { { STBTE__leftup , STBTE__leftdown  },
+                               { STBTE__rightup, STBTE__rightdown } };
+   stbte__set_event(events[right][down], x,y);
+   stbte__ui.shift = shifted;
+   stbte__ui.scrollkey = scrollkey;
+
+   stbte__do_event(tm);
+}
+
+void stbte_mouse_wheel(stbte_tilemap *tm, int x, int y, int vscroll)
+{
+   // not implemented yet -- need different way of hittesting
+}
+
+void stbte_action(stbte_tilemap *tm, enum stbte_action act)
+{
+   switch (act) {
+      case STBTE_tool_select:      stbte__ui.tool = STBTE__tool_select;               break;
+      case STBTE_tool_brush:       stbte__ui.tool = STBTE__tool_brush;                break;
+      case STBTE_tool_erase:       stbte__ui.tool = STBTE__tool_erase;                break;
+      case STBTE_tool_rectangle:   stbte__ui.tool = STBTE__tool_rect;                 break;
+      case STBTE_tool_eyedropper:  stbte__ui.tool = STBTE__tool_eyedrop;              break;
+      case STBTE_tool_link:        stbte__ui.tool = STBTE__tool_link;                 break;
+      case STBTE_act_toggle_grid:  stbte__ui.show_grid = (stbte__ui.show_grid+1) % 3; break;
+      case STBTE_act_toggle_links: stbte__ui.show_links ^= 1;                         break;
+      case STBTE_act_undo:         stbte__undo(tm);                                   break;
+      case STBTE_act_redo:         stbte__redo(tm);                                   break;
+      case STBTE_act_cut:          stbte__copy_cut(tm, 1);                            break;
+      case STBTE_act_copy:         stbte__copy_cut(tm, 0);                            break;
+      case STBTE_act_paste:        stbte__start_paste(tm);                            break;
+      case STBTE_scroll_left:      tm->scroll_x -= tm->spacing_x;                     break;
+      case STBTE_scroll_right:     tm->scroll_x += tm->spacing_x;                     break;
+      case STBTE_scroll_up:        tm->scroll_y -= tm->spacing_y;                     break;
+      case STBTE_scroll_down:      tm->scroll_y += tm->spacing_y;                     break;
+   }
+}
+
+void stbte_tick(stbte_tilemap *tm, float dt)
+{
+   stbte__ui.event = STBTE__tick;
+   stbte__ui.dt    = dt;
+   stbte__do_event(tm);
+   stbte__ui.ms_time += (int) (dt * 1024) + 1; // make sure if time is superfast it always updates a little
+}
+
+void stbte_mouse_sdl(stbte_tilemap *tm, const void *sdl_event, float xs, float ys, int xo, int yo)
+{
+#ifdef _SDL_H
+   SDL_Event *event = (SDL_Event *) sdl_event;
+   SDL_Keymod km = SDL_GetModState();
+   int shift = (km & KMOD_LCTRL) || (km & KMOD_RCTRL);
+   int scrollkey = 0 != SDL_GetKeyboardState(NULL)[SDL_SCANCODE_SPACE];
+   switch (event->type) {
+      case SDL_MOUSEMOTION:
+         stbte_mouse_move(tm, (int) (xs*event->motion.x+xo), (int) (ys*event->motion.y+yo), shift, scrollkey);
+         break;
+      case SDL_MOUSEBUTTONUP:
+         stbte_mouse_button(tm, (int) (xs*event->button.x+xo), (int) (ys*event->button.y+yo), event->button.button != SDL_BUTTON_LEFT, 0, shift, scrollkey);
+         break;
+      case SDL_MOUSEBUTTONDOWN:
+         stbte_mouse_button(tm, (int) (xs*event->button.x+xo), (int) (ys*event->button.y+yo), event->button.button != SDL_BUTTON_LEFT, 1, shift, scrollkey);
+         break;
+      case SDL_MOUSEWHEEL:
+         stbte_mouse_wheel(tm, stbte__ui.mx, stbte__ui.my, event->wheel.y);
+         break;
+   }
+#else
+   STBTE__NOTUSED(tm);
+   STBTE__NOTUSED(sdl_event);
+   STBTE__NOTUSED(xs);
+   STBTE__NOTUSED(ys);
+   STBTE__NOTUSED(xo);
+   STBTE__NOTUSED(yo);
+#endif
+}
+
+#endif // STB_TILEMAP_EDITOR_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_truetype.h
@@ -1,0 +1,5077 @@
+// stb_truetype.h - v1.26 - public domain
+// authored from 2009-2021 by Sean Barrett / RAD Game Tools
+//
+// =======================================================================
+//
+//    NO SECURITY GUARANTEE -- DO NOT USE THIS ON UNTRUSTED FONT FILES
+//
+// This library does no range checking of the offsets found in the file,
+// meaning an attacker can use it to read arbitrary memory.
+//
+// =======================================================================
+//
+//   This library processes TrueType files:
+//        parse files
+//        extract glyph metrics
+//        extract glyph shapes
+//        render glyphs to one-channel bitmaps with antialiasing (box filter)
+//        render glyphs to one-channel SDF bitmaps (signed-distance field/function)
+//
+//   Todo:
+//        non-MS cmaps
+//        crashproof on bad data
+//        hinting? (no longer patented)
+//        cleartype-style AA?
+//        optimize: use simple memory allocator for intermediates
+//        optimize: build edge-list directly from curves
+//        optimize: rasterize directly from curves?
+//
+// ADDITIONAL CONTRIBUTORS
+//
+//   Mikko Mononen: compound shape support, more cmap formats
+//   Tor Andersson: kerning, subpixel rendering
+//   Dougall Johnson: OpenType / Type 2 font handling
+//   Daniel Ribeiro Maciel: basic GPOS-based kerning
+//
+//   Misc other:
+//       Ryan Gordon
+//       Simon Glass
+//       github:IntellectualKitty
+//       Imanol Celaya
+//       Daniel Ribeiro Maciel
+//
+//   Bug/warning reports/fixes:
+//       "Zer" on mollyrocket       Fabian "ryg" Giesen   github:NiLuJe
+//       Cass Everitt               Martins Mozeiko       github:aloucks
+//       stoiko (Haemimont Games)   Cap Petschulat        github:oyvindjam
+//       Brian Hook                 Omar Cornut           github:vassvik
+//       Walter van Niftrik         Ryan Griege
+//       David Gow                  Peter LaValle
+//       David Given                Sergey Popov
+//       Ivan-Assen Ivanov          Giumo X. Clanjor
+//       Anthony Pesch              Higor Euripedes
+//       Johan Duparc               Thomas Fields
+//       Hou Qiming                 Derek Vinyard
+//       Rob Loach                  Cort Stratton
+//       Kenney Phillis Jr.         Brian Costabile
+//       Ken Voskuil (kaesve)
+//
+// VERSION HISTORY
+//
+//   1.26 (2021-08-28) fix broken rasterizer
+//   1.25 (2021-07-11) many fixes
+//   1.24 (2020-02-05) fix warning
+//   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
+//   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
+//   1.21 (2019-02-25) fix warning
+//   1.20 (2019-02-07) PackFontRange skips missing codepoints; GetScaleFontVMetrics()
+//   1.19 (2018-02-11) GPOS kerning, STBTT_fmod
+//   1.18 (2018-01-29) add missing function
+//   1.17 (2017-07-23) make more arguments const; doc fix
+//   1.16 (2017-07-12) SDF support
+//   1.15 (2017-03-03) make more arguments const
+//   1.14 (2017-01-16) num-fonts-in-TTC function
+//   1.13 (2017-01-02) support OpenType fonts, certain Apple fonts
+//   1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual
+//   1.11 (2016-04-02) fix unused-variable warning
+//   1.10 (2016-04-02) user-defined fabs(); rare memory leak; remove duplicate typedef
+//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use allocation userdata properly
+//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
+//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
+//                     variant PackFontRanges to pack and render in separate phases;
+//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
+//                     fixed an assert() bug in the new rasterizer
+//                     replace assert() with STBTT_assert() in new rasterizer
+//
+//   Full history can be found at the end of this file.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// USAGE
+//
+//   Include this file in whatever places need to refer to it. In ONE C/C++
+//   file, write:
+//      #define STB_TRUETYPE_IMPLEMENTATION
+//   before the #include of this file. This expands out the actual
+//   implementation into that C/C++ file.
+//
+//   To make the implementation private to the file that generates the implementation,
+//      #define STBTT_STATIC
+//
+//   Simple 3D API (don't ship this, but it's fine for tools and quick start)
+//           stbtt_BakeFontBitmap()               -- bake a font to a bitmap for use as texture
+//           stbtt_GetBakedQuad()                 -- compute quad to draw for a given char
+//
+//   Improved 3D API (more shippable):
+//           #include "stb_rect_pack.h"           -- optional, but you really want it
+//           stbtt_PackBegin()
+//           stbtt_PackSetOversampling()          -- for improved quality on small fonts
+//           stbtt_PackFontRanges()               -- pack and renders
+//           stbtt_PackEnd()
+//           stbtt_GetPackedQuad()
+//
+//   "Load" a font file from a memory buffer (you have to keep the buffer loaded)
+//           stbtt_InitFont()
+//           stbtt_GetFontOffsetForIndex()        -- indexing for TTC font collections
+//           stbtt_GetNumberOfFonts()             -- number of fonts for TTC font collections
+//
+//   Render a unicode codepoint to a bitmap
+//           stbtt_GetCodepointBitmap()           -- allocates and returns a bitmap
+//           stbtt_MakeCodepointBitmap()          -- renders into bitmap you provide
+//           stbtt_GetCodepointBitmapBox()        -- how big the bitmap must be
+//
+//   Character advance/positioning
+//           stbtt_GetCodepointHMetrics()
+//           stbtt_GetFontVMetrics()
+//           stbtt_GetFontVMetricsOS2()
+//           stbtt_GetCodepointKernAdvance()
+//
+//   Starting with version 1.06, the rasterizer was replaced with a new,
+//   faster and generally-more-precise rasterizer. The new rasterizer more
+//   accurately measures pixel coverage for anti-aliasing, except in the case
+//   where multiple shapes overlap, in which case it overestimates the AA pixel
+//   coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If
+//   this turns out to be a problem, you can re-enable the old rasterizer with
+//        #define STBTT_RASTERIZER_VERSION 1
+//   which will incur about a 15% speed hit.
+//
+// ADDITIONAL DOCUMENTATION
+//
+//   Immediately after this block comment are a series of sample programs.
+//
+//   After the sample programs is the "header file" section. This section
+//   includes documentation for each API function.
+//
+//   Some important concepts to understand to use this library:
+//
+//      Codepoint
+//         Characters are defined by unicode codepoints, e.g. 65 is
+//         uppercase A, 231 is lowercase c with a cedilla, 0x7e30 is
+//         the hiragana for "ma".
+//
+//      Glyph
+//         A visual character shape (every codepoint is rendered as
+//         some glyph)
+//
+//      Glyph index
+//         A font-specific integer ID representing a glyph
+//
+//      Baseline
+//         Glyph shapes are defined relative to a baseline, which is the
+//         bottom of uppercase characters. Characters extend both above
+//         and below the baseline.
+//
+//      Current Point
+//         As you draw text to the screen, you keep track of a "current point"
+//         which is the origin of each character. The current point's vertical
+//         position is the baseline. Even "baked fonts" use this model.
+//
+//      Vertical Font Metrics
+//         The vertical qualities of the font, used to vertically position
+//         and space the characters. See docs for stbtt_GetFontVMetrics.
+//
+//      Font Size in Pixels or Points
+//         The preferred interface for specifying font sizes in stb_truetype
+//         is to specify how tall the font's vertical extent should be in pixels.
+//         If that sounds good enough, skip the next paragraph.
+//
+//         Most font APIs instead use "points", which are a common typographic
+//         measurement for describing font size, defined as 72 points per inch.
+//         stb_truetype provides a point API for compatibility. However, true
+//         "per inch" conventions don't make much sense on computer displays
+//         since different monitors have different number of pixels per
+//         inch. For example, Windows traditionally uses a convention that
+//         there are 96 pixels per inch, thus making 'inch' measurements have
+//         nothing to do with inches, and thus effectively defining a point to
+//         be 1.333 pixels. Additionally, the TrueType font data provides
+//         an explicit scale factor to scale a given font's glyphs to points,
+//         but the author has observed that this scale factor is often wrong
+//         for non-commercial fonts, thus making fonts scaled in points
+//         according to the TrueType spec incoherently sized in practice.
+//
+// DETAILED USAGE:
+//
+//  Scale:
+//    Select how high you want the font to be, in points or pixels.
+//    Call ScaleForPixelHeight or ScaleForMappingEmToPixels to compute
+//    a scale factor SF that will be used by all other functions.
+//
+//  Baseline:
+//    You need to select a y-coordinate that is the baseline of where
+//    your text will appear. Call GetFontBoundingBox to get the baseline-relative
+//    bounding box for all characters. SF*-y0 will be the distance in pixels
+//    that the worst-case character could extend above the baseline, so if
+//    you want the top edge of characters to appear at the top of the
+//    screen where y=0, then you would set the baseline to SF*-y0.
+//
+//  Current point:
+//    Set the current point where the first character will appear. The
+//    first character could extend left of the current point; this is font
+//    dependent. You can either choose a current point that is the leftmost
+//    point and hope, or add some padding, or check the bounding box or
+//    left-side-bearing of the first character to be displayed and set
+//    the current point based on that.
+//
+//  Displaying a character:
+//    Compute the bounding box of the character. It will contain signed values
+//    relative to <current_point, baseline>. I.e. if it returns x0,y0,x1,y1,
+//    then the character should be displayed in the rectangle from
+//    <current_point+SF*x0, baseline+SF*y0> to <current_point+SF*x1,baseline+SF*y1).
+//
+//  Advancing for the next character:
+//    Call GlyphHMetrics, and compute 'current_point += SF * advance'.
+//
+//
+// ADVANCED USAGE
+//
+//   Quality:
+//
+//    - Use the functions with Subpixel at the end to allow your characters
+//      to have subpixel positioning. Since the font is anti-aliased, not
+//      hinted, this is very import for quality. (This is not possible with
+//      baked fonts.)
+//
+//    - Kerning is now supported, and if you're supporting subpixel rendering
+//      then kerning is worth using to give your text a polished look.
+//
+//   Performance:
+//
+//    - Convert Unicode codepoints to glyph indexes and operate on the glyphs;
+//      if you don't do this, stb_truetype is forced to do the conversion on
+//      every call.
+//
+//    - There are a lot of memory allocations. We should modify it to take
+//      a temp buffer and allocate from the temp buffer (without freeing),
+//      should help performance a lot.
+//
+// NOTES
+//
+//   The system uses the raw data found in the .ttf file without changing it
+//   and without building auxiliary data structures. This is a bit inefficient
+//   on little-endian systems (the data is big-endian), but assuming you're
+//   caching the bitmaps or glyph shapes this shouldn't be a big deal.
+//
+//   It appears to be very hard to programmatically determine what font a
+//   given file is in a general way. I provide an API for this, but I don't
+//   recommend it.
+//
+//
+// PERFORMANCE MEASUREMENTS FOR 1.06:
+//
+//                      32-bit     64-bit
+//   Previous release:  8.83 s     7.68 s
+//   Pool allocations:  7.72 s     6.34 s
+//   Inline sort     :  6.54 s     5.65 s
+//   New rasterizer  :  5.63 s     5.00 s
+
+//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+////
+////  SAMPLE PROGRAMS
+////
+//
+//  Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless.
+//  See "tests/truetype_demo_win32.c" for a complete version.
+#if 0
+#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
+#include "stb_truetype.h"
+
+unsigned char ttf_buffer[1<<20];
+unsigned char temp_bitmap[512*512];
+
+stbtt_bakedchar cdata[96]; // ASCII 32..126 is 95 glyphs
+GLuint ftex;
+
+void my_stbtt_initfont(void)
+{
+   fread(ttf_buffer, 1, 1<<20, fopen("c:/windows/fonts/times.ttf", "rb"));
+   stbtt_BakeFontBitmap(ttf_buffer,0, 32.0, temp_bitmap,512,512, 32,96, cdata); // no guarantee this fits!
+   // can free ttf_buffer at this point
+   glGenTextures(1, &ftex);
+   glBindTexture(GL_TEXTURE_2D, ftex);
+   glTexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, 512,512, 0, GL_ALPHA, GL_UNSIGNED_BYTE, temp_bitmap);
+   // can free temp_bitmap at this point
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+}
+
+void my_stbtt_print(float x, float y, char *text)
+{
+   // assume orthographic projection with units = screen pixels, origin at top left
+   glEnable(GL_BLEND);
+   glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
+   glEnable(GL_TEXTURE_2D);
+   glBindTexture(GL_TEXTURE_2D, ftex);
+   glBegin(GL_QUADS);
+   while (*text) {
+      if (*text >= 32 && *text < 128) {
+         stbtt_aligned_quad q;
+         stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9
+         glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y0);
+         glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y0);
+         glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y1);
+         glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y1);
+      }
+      ++text;
+   }
+   glEnd();
+}
+#endif
+//
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+// Complete program (this compiles): get a single bitmap, print as ASCII art
+//
+#if 0
+#include <stdio.h>
+#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
+#include "stb_truetype.h"
+
+char ttf_buffer[1<<25];
+
+int main(int argc, char **argv)
+{
+   stbtt_fontinfo font;
+   unsigned char *bitmap;
+   int w,h,i,j,c = (argc > 1 ? atoi(argv[1]) : 'a'), s = (argc > 2 ? atoi(argv[2]) : 20);
+
+   fread(ttf_buffer, 1, 1<<25, fopen(argc > 3 ? argv[3] : "c:/windows/fonts/arialbd.ttf", "rb"));
+
+   stbtt_InitFont(&font, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer,0));
+   bitmap = stbtt_GetCodepointBitmap(&font, 0,stbtt_ScaleForPixelHeight(&font, s), c, &w, &h, 0,0);
+
+   for (j=0; j < h; ++j) {
+      for (i=0; i < w; ++i)
+         putchar(" .:ioVM@"[bitmap[j*w+i]>>5]);
+      putchar('\n');
+   }
+   return 0;
+}
+#endif
+//
+// Output:
+//
+//     .ii.
+//    @@@@@@.
+//   V@Mio@@o
+//   :i.  V@V
+//     :oM@@M
+//   :@@@MM@M
+//   @@o  o@M
+//  :@@.  M@M
+//   @@@o@@@@
+//   :M@@V:@@.
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+// Complete program: print "Hello World!" banner, with bugs
+//
+#if 0
+char buffer[24<<20];
+unsigned char screen[20][79];
+
+int main(int arg, char **argv)
+{
+   stbtt_fontinfo font;
+   int i,j,ascent,baseline,ch=0;
+   float scale, xpos=2; // leave a little padding in case the character extends left
+   char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness
+
+   fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb"));
+   stbtt_InitFont(&font, buffer, 0);
+
+   scale = stbtt_ScaleForPixelHeight(&font, 15);
+   stbtt_GetFontVMetrics(&font, &ascent,0,0);
+   baseline = (int) (ascent*scale);
+
+   while (text[ch]) {
+      int advance,lsb,x0,y0,x1,y1;
+      float x_shift = xpos - (float) floor(xpos);
+      stbtt_GetCodepointHMetrics(&font, text[ch], &advance, &lsb);
+      stbtt_GetCodepointBitmapBoxSubpixel(&font, text[ch], scale,scale,x_shift,0, &x0,&y0,&x1,&y1);
+      stbtt_MakeCodepointBitmapSubpixel(&font, &screen[baseline + y0][(int) xpos + x0], x1-x0,y1-y0, 79, scale,scale,x_shift,0, text[ch]);
+      // note that this stomps the old data, so where character boxes overlap (e.g. 'lj') it's wrong
+      // because this API is really for baking character bitmaps into textures. if you want to render
+      // a sequence of characters, you really need to render each bitmap to a temp buffer, then
+      // "alpha blend" that into the working buffer
+      xpos += (advance * scale);
+      if (text[ch+1])
+         xpos += scale*stbtt_GetCodepointKernAdvance(&font, text[ch],text[ch+1]);
+      ++ch;
+   }
+
+   for (j=0; j < 20; ++j) {
+      for (i=0; i < 78; ++i)
+         putchar(" .:ioVM@"[screen[j][i]>>5]);
+      putchar('\n');
+   }
+
+   return 0;
+}
+#endif
+
+
+//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+////
+////   INTEGRATION WITH YOUR CODEBASE
+////
+////   The following sections allow you to supply alternate definitions
+////   of C library functions used by stb_truetype, e.g. if you don't
+////   link with the C runtime library.
+
+#ifdef STB_TRUETYPE_IMPLEMENTATION
+   // #define your own (u)stbtt_int8/16/32 before including to override this
+   #ifndef stbtt_uint8
+   typedef unsigned char   stbtt_uint8;
+   typedef signed   char   stbtt_int8;
+   typedef unsigned short  stbtt_uint16;
+   typedef signed   short  stbtt_int16;
+   typedef unsigned int    stbtt_uint32;
+   typedef signed   int    stbtt_int32;
+   #endif
+
+   typedef char stbtt__check_size32[sizeof(stbtt_int32)==4 ? 1 : -1];
+   typedef char stbtt__check_size16[sizeof(stbtt_int16)==2 ? 1 : -1];
+
+   // e.g. #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h
+   #ifndef STBTT_ifloor
+   #include <math.h>
+   #define STBTT_ifloor(x)   ((int) floor(x))
+   #define STBTT_iceil(x)    ((int) ceil(x))
+   #endif
+
+   #ifndef STBTT_sqrt
+   #include <math.h>
+   #define STBTT_sqrt(x)      sqrt(x)
+   #define STBTT_pow(x,y)     pow(x,y)
+   #endif
+
+   #ifndef STBTT_fmod
+   #include <math.h>
+   #define STBTT_fmod(x,y)    fmod(x,y)
+   #endif
+
+   #ifndef STBTT_cos
+   #include <math.h>
+   #define STBTT_cos(x)       cos(x)
+   #define STBTT_acos(x)      acos(x)
+   #endif
+
+   #ifndef STBTT_fabs
+   #include <math.h>
+   #define STBTT_fabs(x)      fabs(x)
+   #endif
+
+   // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h
+   #ifndef STBTT_malloc
+   #include <stdlib.h>
+   #define STBTT_malloc(x,u)  ((void)(u),malloc(x))
+   #define STBTT_free(x,u)    ((void)(u),free(x))
+   #endif
+
+   #ifndef STBTT_assert
+   #include <assert.h>
+   #define STBTT_assert(x)    assert(x)
+   #endif
+
+   #ifndef STBTT_strlen
+   #include <string.h>
+   #define STBTT_strlen(x)    strlen(x)
+   #endif
+
+   #ifndef STBTT_memcpy
+   #include <string.h>
+   #define STBTT_memcpy       memcpy
+   #define STBTT_memset       memset
+   #endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+////
+////   INTERFACE
+////
+////
+
+#ifndef __STB_INCLUDE_STB_TRUETYPE_H__
+#define __STB_INCLUDE_STB_TRUETYPE_H__
+
+#ifdef STBTT_STATIC
+#define STBTT_DEF static
+#else
+#define STBTT_DEF extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// private structure
+typedef struct
+{
+   unsigned char *data;
+   int cursor;
+   int size;
+} stbtt__buf;
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// TEXTURE BAKING API
+//
+// If you use this API, you only have to call two functions ever.
+//
+
+typedef struct
+{
+   unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap
+   float xoff,yoff,xadvance;
+} stbtt_bakedchar;
+
+STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset,  // font location (use offset=0 for plain .ttf)
+                                float pixel_height,                     // height of font in pixels
+                                unsigned char *pixels, int pw, int ph,  // bitmap to be filled in
+                                int first_char, int num_chars,          // characters to bake
+                                stbtt_bakedchar *chardata);             // you allocate this, it's num_chars long
+// if return is positive, the first unused row of the bitmap
+// if return is negative, returns the negative of the number of characters that fit
+// if return is 0, no characters fit and no rows were used
+// This uses a very crappy packing.
+
+typedef struct
+{
+   float x0,y0,s0,t0; // top-left
+   float x1,y1,s1,t1; // bottom-right
+} stbtt_aligned_quad;
+
+STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph,  // same data as above
+                               int char_index,             // character to display
+                               float *xpos, float *ypos,   // pointers to current position in screen pixel space
+                               stbtt_aligned_quad *q,      // output: quad to draw
+                               int opengl_fillrule);       // true if opengl fill rule; false if DX9 or earlier
+// Call GetBakedQuad with char_index = 'character - first_char', and it
+// creates the quad you need to draw and advances the current position.
+//
+// The coordinate system used assumes y increases downwards.
+//
+// Characters will extend both above and below the current position;
+// see discussion of "BASELINE" above.
+//
+// It's inefficient; you might want to c&p it and optimize it.
+
+STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, float size, float *ascent, float *descent, float *lineGap);
+// Query the font vertical metrics without having to create a font first.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// NEW TEXTURE BAKING API
+//
+// This provides options for packing multiple fonts into one atlas, not
+// perfectly but better than nothing.
+
+typedef struct
+{
+   unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap
+   float xoff,yoff,xadvance;
+   float xoff2,yoff2;
+} stbtt_packedchar;
+
+typedef struct stbtt_pack_context stbtt_pack_context;
+typedef struct stbtt_fontinfo stbtt_fontinfo;
+#ifndef STB_RECT_PACK_VERSION
+typedef struct stbrp_rect stbrp_rect;
+#endif
+
+STBTT_DEF int  stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int width, int height, int stride_in_bytes, int padding, void *alloc_context);
+// Initializes a packing context stored in the passed-in stbtt_pack_context.
+// Future calls using this context will pack characters into the bitmap passed
+// in here: a 1-channel bitmap that is width * height. stride_in_bytes is
+// the distance from one row to the next (or 0 to mean they are packed tightly
+// together). "padding" is the amount of padding to leave between each
+// character (normally you want '1' for bitmaps you'll use as textures with
+// bilinear filtering).
+//
+// Returns 0 on failure, 1 on success.
+
+STBTT_DEF void stbtt_PackEnd  (stbtt_pack_context *spc);
+// Cleans up the packing context and frees all memory.
+
+#define STBTT_POINT_SIZE(x)   (-(x))
+
+STBTT_DEF int  stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, float font_size,
+                                int first_unicode_char_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range);
+// Creates character bitmaps from the font_index'th font found in fontdata (use
+// font_index=0 if you don't know what that is). It creates num_chars_in_range
+// bitmaps for characters with unicode values starting at first_unicode_char_in_range
+// and increasing. Data for how to render them is stored in chardata_for_range;
+// pass these to stbtt_GetPackedQuad to get back renderable quads.
+//
+// font_size is the full height of the character from ascender to descender,
+// as computed by stbtt_ScaleForPixelHeight. To use a point size as computed
+// by stbtt_ScaleForMappingEmToPixels, wrap the point size in STBTT_POINT_SIZE()
+// and pass that result as 'font_size':
+//       ...,                  20 , ... // font max minus min y is 20 pixels tall
+//       ..., STBTT_POINT_SIZE(20), ... // 'M' is 20 pixels tall
+
+typedef struct
+{
+   float font_size;
+   int first_unicode_codepoint_in_range;  // if non-zero, then the chars are continuous, and this is the first codepoint
+   int *array_of_unicode_codepoints;       // if non-zero, then this is an array of unicode codepoints
+   int num_chars;
+   stbtt_packedchar *chardata_for_range; // output
+   unsigned char h_oversample, v_oversample; // don't set these, they're used internally
+} stbtt_pack_range;
+
+STBTT_DEF int  stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges);
+// Creates character bitmaps from multiple ranges of characters stored in
+// ranges. This will usually create a better-packed bitmap than multiple
+// calls to stbtt_PackFontRange. Note that you can call this multiple
+// times within a single PackBegin/PackEnd.
+
+STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample);
+// Oversampling a font increases the quality by allowing higher-quality subpixel
+// positioning, and is especially valuable at smaller text sizes.
+//
+// This function sets the amount of oversampling for all following calls to
+// stbtt_PackFontRange(s) or stbtt_PackFontRangesGatherRects for a given
+// pack context. The default (no oversampling) is achieved by h_oversample=1
+// and v_oversample=1. The total number of pixels required is
+// h_oversample*v_oversample larger than the default; for example, 2x2
+// oversampling requires 4x the storage of 1x1. For best results, render
+// oversampled textures with bilinear filtering. Look at the readme in
+// stb/tests/oversample for information about oversampled fonts
+//
+// To use with PackFontRangesGather etc., you must set it before calls
+// call to PackFontRangesGatherRects.
+
+STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip);
+// If skip != 0, this tells stb_truetype to skip any codepoints for which
+// there is no corresponding glyph. If skip=0, which is the default, then
+// codepoints without a glyph recived the font's "missing character" glyph,
+// typically an empty box by convention.
+
+STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph,  // same data as above
+                               int char_index,             // character to display
+                               float *xpos, float *ypos,   // pointers to current position in screen pixel space
+                               stbtt_aligned_quad *q,      // output: quad to draw
+                               int align_to_integer);
+
+STBTT_DEF int  stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects);
+STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects);
+STBTT_DEF int  stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects);
+// Calling these functions in sequence is roughly equivalent to calling
+// stbtt_PackFontRanges(). If you more control over the packing of multiple
+// fonts, or if you want to pack custom data into a font texture, take a look
+// at the source to of stbtt_PackFontRanges() and create a custom version
+// using these functions, e.g. call GatherRects multiple times,
+// building up a single array of rects, then call PackRects once,
+// then call RenderIntoRects repeatedly. This may result in a
+// better packing than calling PackFontRanges multiple times
+// (or it may not).
+
+// this is an opaque structure that you shouldn't mess with which holds
+// all the context needed from PackBegin to PackEnd.
+struct stbtt_pack_context {
+   void *user_allocator_context;
+   void *pack_info;
+   int   width;
+   int   height;
+   int   stride_in_bytes;
+   int   padding;
+   int   skip_missing;
+   unsigned int   h_oversample, v_oversample;
+   unsigned char *pixels;
+   void  *nodes;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// FONT LOADING
+//
+//
+
+STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data);
+// This function will determine the number of fonts in a font file.  TrueType
+// collection (.ttc) files may contain multiple fonts, while TrueType font
+// (.ttf) files only contain one font. The number of fonts can be used for
+// indexing with the previous function where the index is between zero and one
+// less than the total fonts. If an error occurs, -1 is returned.
+
+STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index);
+// Each .ttf/.ttc file may have more than one font. Each font has a sequential
+// index number starting from 0. Call this function to get the font offset for
+// a given index; it returns -1 if the index is out of range. A regular .ttf
+// file will only define one font and it always be at offset 0, so it will
+// return '0' for index 0, and -1 for all other indices.
+
+// The following structure is defined publicly so you can declare one on
+// the stack or as a global or etc, but you should treat it as opaque.
+struct stbtt_fontinfo
+{
+   void           * userdata;
+   unsigned char  * data;              // pointer to .ttf file
+   int              fontstart;         // offset of start of font
+
+   int numGlyphs;                     // number of glyphs, needed for range checking
+
+   int loca,head,glyf,hhea,hmtx,kern,gpos,svg; // table locations as offset from start of .ttf
+   int index_map;                     // a cmap mapping for our chosen character encoding
+   int indexToLocFormat;              // format needed to map from glyph index to glyph
+
+   stbtt__buf cff;                    // cff font data
+   stbtt__buf charstrings;            // the charstring index
+   stbtt__buf gsubrs;                 // global charstring subroutines index
+   stbtt__buf subrs;                  // private charstring subroutines index
+   stbtt__buf fontdicts;              // array of font dicts
+   stbtt__buf fdselect;               // map from glyph to fontdict
+};
+
+STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset);
+// Given an offset into the file that defines a font, this function builds
+// the necessary cached info for the rest of the system. You must allocate
+// the stbtt_fontinfo yourself, and stbtt_InitFont will fill it out. You don't
+// need to do anything special to free it, because the contents are pure
+// value data with no additional data structures. Returns 0 on failure.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// CHARACTER TO GLYPH-INDEX CONVERSIOn
+
+STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint);
+// If you're going to perform multiple operations on the same character
+// and you want a speed-up, call this function with the character you're
+// going to process, then use glyph-based functions instead of the
+// codepoint-based functions.
+// Returns 0 if the character codepoint is not defined in the font.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// CHARACTER PROPERTIES
+//
+
+STBTT_DEF float stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, float pixels);
+// computes a scale factor to produce a font whose "height" is 'pixels' tall.
+// Height is measured as the distance from the highest ascender to the lowest
+// descender; in other words, it's equivalent to calling stbtt_GetFontVMetrics
+// and computing:
+//       scale = pixels / (ascent - descent)
+// so if you prefer to measure height by the ascent only, use a similar calculation.
+
+STBTT_DEF float stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, float pixels);
+// computes a scale factor to produce a font whose EM size is mapped to
+// 'pixels' tall. This is probably what traditional APIs compute, but
+// I'm not positive.
+
+STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap);
+// ascent is the coordinate above the baseline the font extends; descent
+// is the coordinate below the baseline the font extends (i.e. it is typically negative)
+// lineGap is the spacing between one row's descent and the next row's ascent...
+// so you should advance the vertical position by "*ascent - *descent + *lineGap"
+//   these are expressed in unscaled coordinates, so you must multiply by
+//   the scale factor for a given size
+
+STBTT_DEF int  stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap);
+// analogous to GetFontVMetrics, but returns the "typographic" values from the OS/2
+// table (specific to MS/Windows TTF files).
+//
+// Returns 1 on success (table present), 0 on failure.
+
+STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1);
+// the bounding box around all possible characters
+
+STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing);
+// leftSideBearing is the offset from the current horizontal position to the left edge of the character
+// advanceWidth is the offset from the current horizontal position to the next horizontal position
+//   these are expressed in unscaled coordinates
+
+STBTT_DEF int  stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2);
+// an additional amount to add to the 'advance' value between ch1 and ch2
+
+STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1);
+// Gets the bounding box of the visible part of the glyph, in unscaled coordinates
+
+STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing);
+STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2);
+STBTT_DEF int  stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
+// as above, but takes one or more glyph indices for greater efficiency
+
+typedef struct stbtt_kerningentry
+{
+   int glyph1; // use stbtt_FindGlyphIndex
+   int glyph2;
+   int advance;
+} stbtt_kerningentry;
+
+STBTT_DEF int  stbtt_GetKerningTableLength(const stbtt_fontinfo *info);
+STBTT_DEF int  stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningentry* table, int table_length);
+// Retrieves a complete list of all of the kerning pairs provided by the font
+// stbtt_GetKerningTable never writes more than table_length entries and returns how many entries it did write.
+// The table will be sorted by (a.glyph1 == b.glyph1)?(a.glyph2 < b.glyph2):(a.glyph1 < b.glyph1)
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// GLYPH SHAPES (you probably don't need these, but they have to go before
+// the bitmaps for C declaration-order reasons)
+//
+
+#ifndef STBTT_vmove // you can predefine these to use different values (but why?)
+   enum {
+      STBTT_vmove=1,
+      STBTT_vline,
+      STBTT_vcurve,
+      STBTT_vcubic
+   };
+#endif
+
+#ifndef stbtt_vertex // you can predefine this to use different values
+                   // (we share this with other code at RAD)
+   #define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file
+   typedef struct
+   {
+      stbtt_vertex_type x,y,cx,cy,cx1,cy1;
+      unsigned char type,padding;
+   } stbtt_vertex;
+#endif
+
+STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index);
+// returns non-zero if nothing is drawn for this glyph
+
+STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices);
+STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **vertices);
+// returns # of vertices and fills *vertices with the pointer to them
+//   these are expressed in "unscaled" coordinates
+//
+// The shape is a series of contours. Each one starts with
+// a STBTT_moveto, then consists of a series of mixed
+// STBTT_lineto and STBTT_curveto segments. A lineto
+// draws a line from previous endpoint to its x,y; a curveto
+// draws a quadratic bezier from previous endpoint to
+// its x,y, using cx,cy as the bezier control point.
+
+STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices);
+// frees the data allocated above
+
+STBTT_DEF unsigned char *stbtt_FindSVGDoc(const stbtt_fontinfo *info, int gl);
+STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg);
+STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg);
+// fills svg with the character's SVG data.
+// returns data size or 0 if SVG not found.
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// BITMAP RENDERING
+//
+
+STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata);
+// frees the bitmap allocated below
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff);
+// allocates a large-enough single-channel 8bpp bitmap and renders the
+// specified character/glyph at the specified scale into it, with
+// antialiasing. 0 is no coverage (transparent), 255 is fully covered (opaque).
+// *width & *height are filled out with the width & height of the bitmap,
+// which is stored left-to-right, top-to-bottom.
+//
+// xoff/yoff are the offset it pixel space from the glyph origin to the top-left of the bitmap
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff);
+// the same as stbtt_GetCodepoitnBitmap, but you can specify a subpixel
+// shift for the character
+
+STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint);
+// the same as stbtt_GetCodepointBitmap, but you pass in storage for the bitmap
+// in the form of 'output', with row spacing of 'out_stride' bytes. the bitmap
+// is clipped to out_w/out_h bytes. Call stbtt_GetCodepointBitmapBox to get the
+// width and height and positioning info for it first.
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint);
+// same as stbtt_MakeCodepointBitmap, but you can specify a subpixel
+// shift for the character
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int oversample_x, int oversample_y, float *sub_x, float *sub_y, int codepoint);
+// same as stbtt_MakeCodepointBitmapSubpixel, but prefiltering
+// is performed (see stbtt_PackSetOversampling)
+
+STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1);
+// get the bbox of the bitmap centered around the glyph origin; so the
+// bitmap width is ix1-ix0, height is iy1-iy0, and location to place
+// the bitmap top left is (leftSideBearing*scale,iy0).
+// (Note that the bitmap uses y-increases-down, but the shape uses
+// y-increases-up, so CodepointBitmapBox and CodepointBox are inverted.)
+
+STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1);
+// same as stbtt_GetCodepointBitmapBox, but you can specify a subpixel
+// shift for the character
+
+// the following functions are equivalent to the above functions, but operate
+// on glyph indices instead of Unicode codepoints (for efficiency)
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int glyph, int *width, int *height, int *xoff, int *yoff);
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int *width, int *height, int *xoff, int *yoff);
+STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph);
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph);
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int oversample_x, int oversample_y, float *sub_x, float *sub_y, int glyph);
+STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1);
+STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y,float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1);
+
+
+// @TODO: don't expose this structure
+typedef struct
+{
+   int w,h,stride;
+   unsigned char *pixels;
+} stbtt__bitmap;
+
+// rasterize a shape with quadratic beziers into a bitmap
+STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result,        // 1-channel bitmap to draw into
+                               float flatness_in_pixels,     // allowable error of curve in pixels
+                               stbtt_vertex *vertices,       // array of vertices defining shape
+                               int num_verts,                // number of vertices in above array
+                               float scale_x, float scale_y, // scale applied to input vertices
+                               float shift_x, float shift_y, // translation applied to input vertices
+                               int x_off, int y_off,         // another translation applied to input
+                               int invert,                   // if non-zero, vertically flip shape
+                               void *userdata);              // context for to STBTT_MALLOC
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Signed Distance Function (or Field) rendering
+
+STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata);
+// frees the SDF bitmap allocated below
+
+STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float scale, int glyph, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff);
+STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, float scale, int codepoint, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff);
+// These functions compute a discretized SDF field for a single character, suitable for storing
+// in a single-channel texture, sampling with bilinear filtering, and testing against
+// larger than some threshold to produce scalable fonts.
+//        info              --  the font
+//        scale             --  controls the size of the resulting SDF bitmap, same as it would be creating a regular bitmap
+//        glyph/codepoint   --  the character to generate the SDF for
+//        padding           --  extra "pixels" around the character which are filled with the distance to the character (not 0),
+//                                 which allows effects like bit outlines
+//        onedge_value      --  value 0-255 to test the SDF against to reconstruct the character (i.e. the isocontour of the character)
+//        pixel_dist_scale  --  what value the SDF should increase by when moving one SDF "pixel" away from the edge (on the 0..255 scale)
+//                                 if positive, > onedge_value is inside; if negative, < onedge_value is inside
+//        width,height      --  output height & width of the SDF bitmap (including padding)
+//        xoff,yoff         --  output origin of the character
+//        return value      --  a 2D array of bytes 0..255, width*height in size
+//
+// pixel_dist_scale & onedge_value are a scale & bias that allows you to make
+// optimal use of the limited 0..255 for your application, trading off precision
+// and special effects. SDF values outside the range 0..255 are clamped to 0..255.
+//
+// Example:
+//      scale = stbtt_ScaleForPixelHeight(22)
+//      padding = 5
+//      onedge_value = 180
+//      pixel_dist_scale = 180/5.0 = 36.0
+//
+//      This will create an SDF bitmap in which the character is about 22 pixels
+//      high but the whole bitmap is about 22+5+5=32 pixels high. To produce a filled
+//      shape, sample the SDF at each pixel and fill the pixel if the SDF value
+//      is greater than or equal to 180/255. (You'll actually want to antialias,
+//      which is beyond the scope of this example.) Additionally, you can compute
+//      offset outlines (e.g. to stroke the character border inside & outside,
+//      or only outside). For example, to fill outside the character up to 3 SDF
+//      pixels, you would compare against (180-36.0*3)/255 = 72/255. The above
+//      choice of variables maps a range from 5 pixels outside the shape to
+//      2 pixels inside the shape to 0..255; this is intended primarily for apply
+//      outside effects only (the interior range is needed to allow proper
+//      antialiasing of the font at *smaller* sizes)
+//
+// The function computes the SDF analytically at each SDF pixel, not by e.g.
+// building a higher-res bitmap and approximating it. In theory the quality
+// should be as high as possible for an SDF of this size & representation, but
+// unclear if this is true in practice (perhaps building a higher-res bitmap
+// and computing from that can allow drop-out prevention).
+//
+// The algorithm has not been optimized at all, so expect it to be slow
+// if computing lots of characters or very large sizes.
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Finding the right font...
+//
+// You should really just solve this offline, keep your own tables
+// of what font is what, and don't try to get it out of the .ttf file.
+// That's because getting it out of the .ttf file is really hard, because
+// the names in the file can appear in many possible encodings, in many
+// possible languages, and e.g. if you need a case-insensitive comparison,
+// the details of that depend on the encoding & language in a complex way
+// (actually underspecified in truetype, but also gigantic).
+//
+// But you can use the provided functions in two possible ways:
+//     stbtt_FindMatchingFont() will use *case-sensitive* comparisons on
+//             unicode-encoded names to try to find the font you want;
+//             you can run this before calling stbtt_InitFont()
+//
+//     stbtt_GetFontNameString() lets you get any of the various strings
+//             from the file yourself and do your own comparisons on them.
+//             You have to have called stbtt_InitFont() first.
+
+
+STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags);
+// returns the offset (not index) of the font that matches, or -1 if none
+//   if you use STBTT_MACSTYLE_DONTCARE, use a font name like "Arial Bold".
+//   if you use any other flag, use a font name like "Arial"; this checks
+//     the 'macStyle' header field; i don't know if fonts set this consistently
+#define STBTT_MACSTYLE_DONTCARE     0
+#define STBTT_MACSTYLE_BOLD         1
+#define STBTT_MACSTYLE_ITALIC       2
+#define STBTT_MACSTYLE_UNDERSCORE   4
+#define STBTT_MACSTYLE_NONE         8   // <= not same as 0, this makes us check the bitfield is 0
+
+STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2);
+// returns 1/0 whether the first string interpreted as utf8 is identical to
+// the second string interpreted as big-endian utf16... useful for strings from next func
+
+STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID);
+// returns the string (which may be big-endian double byte, e.g. for unicode)
+// and puts the length in bytes in *length.
+//
+// some of the values for the IDs are below; for more see the truetype spec:
+//     http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6name.html
+//     http://www.microsoft.com/typography/otspec/name.htm
+
+enum { // platformID
+   STBTT_PLATFORM_ID_UNICODE   =0,
+   STBTT_PLATFORM_ID_MAC       =1,
+   STBTT_PLATFORM_ID_ISO       =2,
+   STBTT_PLATFORM_ID_MICROSOFT =3
+};
+
+enum { // encodingID for STBTT_PLATFORM_ID_UNICODE
+   STBTT_UNICODE_EID_UNICODE_1_0    =0,
+   STBTT_UNICODE_EID_UNICODE_1_1    =1,
+   STBTT_UNICODE_EID_ISO_10646      =2,
+   STBTT_UNICODE_EID_UNICODE_2_0_BMP=3,
+   STBTT_UNICODE_EID_UNICODE_2_0_FULL=4
+};
+
+enum { // encodingID for STBTT_PLATFORM_ID_MICROSOFT
+   STBTT_MS_EID_SYMBOL        =0,
+   STBTT_MS_EID_UNICODE_BMP   =1,
+   STBTT_MS_EID_SHIFTJIS      =2,
+   STBTT_MS_EID_UNICODE_FULL  =10
+};
+
+enum { // encodingID for STBTT_PLATFORM_ID_MAC; same as Script Manager codes
+   STBTT_MAC_EID_ROMAN        =0,   STBTT_MAC_EID_ARABIC       =4,
+   STBTT_MAC_EID_JAPANESE     =1,   STBTT_MAC_EID_HEBREW       =5,
+   STBTT_MAC_EID_CHINESE_TRAD =2,   STBTT_MAC_EID_GREEK        =6,
+   STBTT_MAC_EID_KOREAN       =3,   STBTT_MAC_EID_RUSSIAN      =7
+};
+
+enum { // languageID for STBTT_PLATFORM_ID_MICROSOFT; same as LCID...
+       // problematic because there are e.g. 16 english LCIDs and 16 arabic LCIDs
+   STBTT_MS_LANG_ENGLISH     =0x0409,   STBTT_MS_LANG_ITALIAN     =0x0410,
+   STBTT_MS_LANG_CHINESE     =0x0804,   STBTT_MS_LANG_JAPANESE    =0x0411,
+   STBTT_MS_LANG_DUTCH       =0x0413,   STBTT_MS_LANG_KOREAN      =0x0412,
+   STBTT_MS_LANG_FRENCH      =0x040c,   STBTT_MS_LANG_RUSSIAN     =0x0419,
+   STBTT_MS_LANG_GERMAN      =0x0407,   STBTT_MS_LANG_SPANISH     =0x0409,
+   STBTT_MS_LANG_HEBREW      =0x040d,   STBTT_MS_LANG_SWEDISH     =0x041D
+};
+
+enum { // languageID for STBTT_PLATFORM_ID_MAC
+   STBTT_MAC_LANG_ENGLISH      =0 ,   STBTT_MAC_LANG_JAPANESE     =11,
+   STBTT_MAC_LANG_ARABIC       =12,   STBTT_MAC_LANG_KOREAN       =23,
+   STBTT_MAC_LANG_DUTCH        =4 ,   STBTT_MAC_LANG_RUSSIAN      =32,
+   STBTT_MAC_LANG_FRENCH       =1 ,   STBTT_MAC_LANG_SPANISH      =6 ,
+   STBTT_MAC_LANG_GERMAN       =2 ,   STBTT_MAC_LANG_SWEDISH      =5 ,
+   STBTT_MAC_LANG_HEBREW       =10,   STBTT_MAC_LANG_CHINESE_SIMPLIFIED =33,
+   STBTT_MAC_LANG_ITALIAN      =3 ,   STBTT_MAC_LANG_CHINESE_TRAD =19
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __STB_INCLUDE_STB_TRUETYPE_H__
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+////
+////   IMPLEMENTATION
+////
+////
+
+#ifdef STB_TRUETYPE_IMPLEMENTATION
+
+#ifndef STBTT_MAX_OVERSAMPLE
+#define STBTT_MAX_OVERSAMPLE   8
+#endif
+
+#if STBTT_MAX_OVERSAMPLE > 255
+#error "STBTT_MAX_OVERSAMPLE cannot be > 255"
+#endif
+
+typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERSAMPLE-1)) == 0 ? 1 : -1];
+
+#ifndef STBTT_RASTERIZER_VERSION
+#define STBTT_RASTERIZER_VERSION 2
+#endif
+
+#ifdef _MSC_VER
+#define STBTT__NOTUSED(v)  (void)(v)
+#else
+#define STBTT__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+//
+// stbtt__buf helpers to parse data from file
+//
+
+static stbtt_uint8 stbtt__buf_get8(stbtt__buf *b)
+{
+   if (b->cursor >= b->size)
+      return 0;
+   return b->data[b->cursor++];
+}
+
+static stbtt_uint8 stbtt__buf_peek8(stbtt__buf *b)
+{
+   if (b->cursor >= b->size)
+      return 0;
+   return b->data[b->cursor];
+}
+
+static void stbtt__buf_seek(stbtt__buf *b, int o)
+{
+   STBTT_assert(!(o > b->size || o < 0));
+   b->cursor = (o > b->size || o < 0) ? b->size : o;
+}
+
+static void stbtt__buf_skip(stbtt__buf *b, int o)
+{
+   stbtt__buf_seek(b, b->cursor + o);
+}
+
+static stbtt_uint32 stbtt__buf_get(stbtt__buf *b, int n)
+{
+   stbtt_uint32 v = 0;
+   int i;
+   STBTT_assert(n >= 1 && n <= 4);
+   for (i = 0; i < n; i++)
+      v = (v << 8) | stbtt__buf_get8(b);
+   return v;
+}
+
+static stbtt__buf stbtt__new_buf(const void *p, size_t size)
+{
+   stbtt__buf r;
+   STBTT_assert(size < 0x40000000);
+   r.data = (stbtt_uint8*) p;
+   r.size = (int) size;
+   r.cursor = 0;
+   return r;
+}
+
+#define stbtt__buf_get16(b)  stbtt__buf_get((b), 2)
+#define stbtt__buf_get32(b)  stbtt__buf_get((b), 4)
+
+static stbtt__buf stbtt__buf_range(const stbtt__buf *b, int o, int s)
+{
+   stbtt__buf r = stbtt__new_buf(NULL, 0);
+   if (o < 0 || s < 0 || o > b->size || s > b->size - o) return r;
+   r.data = b->data + o;
+   r.size = s;
+   return r;
+}
+
+static stbtt__buf stbtt__cff_get_index(stbtt__buf *b)
+{
+   int count, start, offsize;
+   start = b->cursor;
+   count = stbtt__buf_get16(b);
+   if (count) {
+      offsize = stbtt__buf_get8(b);
+      STBTT_assert(offsize >= 1 && offsize <= 4);
+      stbtt__buf_skip(b, offsize * count);
+      stbtt__buf_skip(b, stbtt__buf_get(b, offsize) - 1);
+   }
+   return stbtt__buf_range(b, start, b->cursor - start);
+}
+
+static stbtt_uint32 stbtt__cff_int(stbtt__buf *b)
+{
+   int b0 = stbtt__buf_get8(b);
+   if (b0 >= 32 && b0 <= 246)       return b0 - 139;
+   else if (b0 >= 247 && b0 <= 250) return (b0 - 247)*256 + stbtt__buf_get8(b) + 108;
+   else if (b0 >= 251 && b0 <= 254) return -(b0 - 251)*256 - stbtt__buf_get8(b) - 108;
+   else if (b0 == 28)               return stbtt__buf_get16(b);
+   else if (b0 == 29)               return stbtt__buf_get32(b);
+   STBTT_assert(0);
+   return 0;
+}
+
+static void stbtt__cff_skip_operand(stbtt__buf *b) {
+   int v, b0 = stbtt__buf_peek8(b);
+   STBTT_assert(b0 >= 28);
+   if (b0 == 30) {
+      stbtt__buf_skip(b, 1);
+      while (b->cursor < b->size) {
+         v = stbtt__buf_get8(b);
+         if ((v & 0xF) == 0xF || (v >> 4) == 0xF)
+            break;
+      }
+   } else {
+      stbtt__cff_int(b);
+   }
+}
+
+static stbtt__buf stbtt__dict_get(stbtt__buf *b, int key)
+{
+   stbtt__buf_seek(b, 0);
+   while (b->cursor < b->size) {
+      int start = b->cursor, end, op;
+      while (stbtt__buf_peek8(b) >= 28)
+         stbtt__cff_skip_operand(b);
+      end = b->cursor;
+      op = stbtt__buf_get8(b);
+      if (op == 12)  op = stbtt__buf_get8(b) | 0x100;
+      if (op == key) return stbtt__buf_range(b, start, end-start);
+   }
+   return stbtt__buf_range(b, 0, 0);
+}
+
+static void stbtt__dict_get_ints(stbtt__buf *b, int key, int outcount, stbtt_uint32 *out)
+{
+   int i;
+   stbtt__buf operands = stbtt__dict_get(b, key);
+   for (i = 0; i < outcount && operands.cursor < operands.size; i++)
+      out[i] = stbtt__cff_int(&operands);
+}
+
+static int stbtt__cff_index_count(stbtt__buf *b)
+{
+   stbtt__buf_seek(b, 0);
+   return stbtt__buf_get16(b);
+}
+
+static stbtt__buf stbtt__cff_index_get(stbtt__buf b, int i)
+{
+   int count, offsize, start, end;
+   stbtt__buf_seek(&b, 0);
+   count = stbtt__buf_get16(&b);
+   offsize = stbtt__buf_get8(&b);
+   STBTT_assert(i >= 0 && i < count);
+   STBTT_assert(offsize >= 1 && offsize <= 4);
+   stbtt__buf_skip(&b, i*offsize);
+   start = stbtt__buf_get(&b, offsize);
+   end = stbtt__buf_get(&b, offsize);
+   return stbtt__buf_range(&b, 2+(count+1)*offsize+start, end - start);
+}
+
+//////////////////////////////////////////////////////////////////////////
+//
+// accessors to parse data from file
+//
+
+// on platforms that don't allow misaligned reads, if we want to allow
+// truetype fonts that aren't padded to alignment, define ALLOW_UNALIGNED_TRUETYPE
+
+#define ttBYTE(p)     (* (stbtt_uint8 *) (p))
+#define ttCHAR(p)     (* (stbtt_int8 *) (p))
+#define ttFixed(p)    ttLONG(p)
+
+static stbtt_uint16 ttUSHORT(stbtt_uint8 *p) { return p[0]*256 + p[1]; }
+static stbtt_int16 ttSHORT(stbtt_uint8 *p)   { return p[0]*256 + p[1]; }
+static stbtt_uint32 ttULONG(stbtt_uint8 *p)  { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; }
+static stbtt_int32 ttLONG(stbtt_uint8 *p)    { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; }
+
+#define stbtt_tag4(p,c0,c1,c2,c3) ((p)[0] == (c0) && (p)[1] == (c1) && (p)[2] == (c2) && (p)[3] == (c3))
+#define stbtt_tag(p,str)           stbtt_tag4(p,str[0],str[1],str[2],str[3])
+
+static int stbtt__isfont(stbtt_uint8 *font)
+{
+   // check the version number
+   if (stbtt_tag4(font, '1',0,0,0))  return 1; // TrueType 1
+   if (stbtt_tag(font, "typ1"))   return 1; // TrueType with type 1 font -- we don't support this!
+   if (stbtt_tag(font, "OTTO"))   return 1; // OpenType with CFF
+   if (stbtt_tag4(font, 0,1,0,0)) return 1; // OpenType 1.0
+   if (stbtt_tag(font, "true"))   return 1; // Apple specification for TrueType fonts
+   return 0;
+}
+
+// @OPTIMIZE: binary search
+static stbtt_uint32 stbtt__find_table(stbtt_uint8 *data, stbtt_uint32 fontstart, const char *tag)
+{
+   stbtt_int32 num_tables = ttUSHORT(data+fontstart+4);
+   stbtt_uint32 tabledir = fontstart + 12;
+   stbtt_int32 i;
+   for (i=0; i < num_tables; ++i) {
+      stbtt_uint32 loc = tabledir + 16*i;
+      if (stbtt_tag(data+loc+0, tag))
+         return ttULONG(data+loc+8);
+   }
+   return 0;
+}
+
+static int stbtt_GetFontOffsetForIndex_internal(unsigned char *font_collection, int index)
+{
+   // if it's just a font, there's only one valid index
+   if (stbtt__isfont(font_collection))
+      return index == 0 ? 0 : -1;
+
+   // check if it's a TTC
+   if (stbtt_tag(font_collection, "ttcf")) {
+      // version 1?
+      if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) {
+         stbtt_int32 n = ttLONG(font_collection+8);
+         if (index >= n)
+            return -1;
+         return ttULONG(font_collection+12+index*4);
+      }
+   }
+   return -1;
+}
+
+static int stbtt_GetNumberOfFonts_internal(unsigned char *font_collection)
+{
+   // if it's just a font, there's only one valid font
+   if (stbtt__isfont(font_collection))
+      return 1;
+
+   // check if it's a TTC
+   if (stbtt_tag(font_collection, "ttcf")) {
+      // version 1?
+      if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) {
+         return ttLONG(font_collection+8);
+      }
+   }
+   return 0;
+}
+
+static stbtt__buf stbtt__get_subrs(stbtt__buf cff, stbtt__buf fontdict)
+{
+   stbtt_uint32 subrsoff = 0, private_loc[2] = { 0, 0 };
+   stbtt__buf pdict;
+   stbtt__dict_get_ints(&fontdict, 18, 2, private_loc);
+   if (!private_loc[1] || !private_loc[0]) return stbtt__new_buf(NULL, 0);
+   pdict = stbtt__buf_range(&cff, private_loc[1], private_loc[0]);
+   stbtt__dict_get_ints(&pdict, 19, 1, &subrsoff);
+   if (!subrsoff) return stbtt__new_buf(NULL, 0);
+   stbtt__buf_seek(&cff, private_loc[1]+subrsoff);
+   return stbtt__cff_get_index(&cff);
+}
+
+// since most people won't use this, find this table the first time it's needed
+static int stbtt__get_svg(stbtt_fontinfo *info)
+{
+   stbtt_uint32 t;
+   if (info->svg < 0) {
+      t = stbtt__find_table(info->data, info->fontstart, "SVG ");
+      if (t) {
+         stbtt_uint32 offset = ttULONG(info->data + t + 2);
+         info->svg = t + offset;
+      } else {
+         info->svg = 0;
+      }
+   }
+   return info->svg;
+}
+
+static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, int fontstart)
+{
+   stbtt_uint32 cmap, t;
+   stbtt_int32 i,numTables;
+
+   info->data = data;
+   info->fontstart = fontstart;
+   info->cff = stbtt__new_buf(NULL, 0);
+
+   cmap = stbtt__find_table(data, fontstart, "cmap");       // required
+   info->loca = stbtt__find_table(data, fontstart, "loca"); // required
+   info->head = stbtt__find_table(data, fontstart, "head"); // required
+   info->glyf = stbtt__find_table(data, fontstart, "glyf"); // required
+   info->hhea = stbtt__find_table(data, fontstart, "hhea"); // required
+   info->hmtx = stbtt__find_table(data, fontstart, "hmtx"); // required
+   info->kern = stbtt__find_table(data, fontstart, "kern"); // not required
+   info->gpos = stbtt__find_table(data, fontstart, "GPOS"); // not required
+
+   if (!cmap || !info->head || !info->hhea || !info->hmtx)
+      return 0;
+   if (info->glyf) {
+      // required for truetype
+      if (!info->loca) return 0;
+   } else {
+      // initialization for CFF / Type2 fonts (OTF)
+      stbtt__buf b, topdict, topdictidx;
+      stbtt_uint32 cstype = 2, charstrings = 0, fdarrayoff = 0, fdselectoff = 0;
+      stbtt_uint32 cff;
+
+      cff = stbtt__find_table(data, fontstart, "CFF ");
+      if (!cff) return 0;
+
+      info->fontdicts = stbtt__new_buf(NULL, 0);
+      info->fdselect = stbtt__new_buf(NULL, 0);
+
+      // @TODO this should use size from table (not 512MB)
+      info->cff = stbtt__new_buf(data+cff, 512*1024*1024);
+      b = info->cff;
+
+      // read the header
+      stbtt__buf_skip(&b, 2);
+      stbtt__buf_seek(&b, stbtt__buf_get8(&b)); // hdrsize
+
+      // @TODO the name INDEX could list multiple fonts,
+      // but we just use the first one.
+      stbtt__cff_get_index(&b);  // name INDEX
+      topdictidx = stbtt__cff_get_index(&b);
+      topdict = stbtt__cff_index_get(topdictidx, 0);
+      stbtt__cff_get_index(&b);  // string INDEX
+      info->gsubrs = stbtt__cff_get_index(&b);
+
+      stbtt__dict_get_ints(&topdict, 17, 1, &charstrings);
+      stbtt__dict_get_ints(&topdict, 0x100 | 6, 1, &cstype);
+      stbtt__dict_get_ints(&topdict, 0x100 | 36, 1, &fdarrayoff);
+      stbtt__dict_get_ints(&topdict, 0x100 | 37, 1, &fdselectoff);
+      info->subrs = stbtt__get_subrs(b, topdict);
+
+      // we only support Type 2 charstrings
+      if (cstype != 2) return 0;
+      if (charstrings == 0) return 0;
+
+      if (fdarrayoff) {
+         // looks like a CID font
+         if (!fdselectoff) return 0;
+         stbtt__buf_seek(&b, fdarrayoff);
+         info->fontdicts = stbtt__cff_get_index(&b);
+         info->fdselect = stbtt__buf_range(&b, fdselectoff, b.size-fdselectoff);
+      }
+
+      stbtt__buf_seek(&b, charstrings);
+      info->charstrings = stbtt__cff_get_index(&b);
+   }
+
+   t = stbtt__find_table(data, fontstart, "maxp");
+   if (t)
+      info->numGlyphs = ttUSHORT(data+t+4);
+   else
+      info->numGlyphs = 0xffff;
+
+   info->svg = -1;
+
+   // find a cmap encoding table we understand *now* to avoid searching
+   // later. (todo: could make this installable)
+   // the same regardless of glyph.
+   numTables = ttUSHORT(data + cmap + 2);
+   info->index_map = 0;
+   for (i=0; i < numTables; ++i) {
+      stbtt_uint32 encoding_record = cmap + 4 + 8 * i;
+      // find an encoding we understand:
+      switch(ttUSHORT(data+encoding_record)) {
+         case STBTT_PLATFORM_ID_MICROSOFT:
+            switch (ttUSHORT(data+encoding_record+2)) {
+               case STBTT_MS_EID_UNICODE_BMP:
+               case STBTT_MS_EID_UNICODE_FULL:
+                  // MS/Unicode
+                  info->index_map = cmap + ttULONG(data+encoding_record+4);
+                  break;
+            }
+            break;
+        case STBTT_PLATFORM_ID_UNICODE:
+            // Mac/iOS has these
+            // all the encodingIDs are unicode, so we don't bother to check it
+            info->index_map = cmap + ttULONG(data+encoding_record+4);
+            break;
+      }
+   }
+   if (info->index_map == 0)
+      return 0;
+
+   info->indexToLocFormat = ttUSHORT(data+info->head + 50);
+   return 1;
+}
+
+STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint)
+{
+   stbtt_uint8 *data = info->data;
+   stbtt_uint32 index_map = info->index_map;
+
+   stbtt_uint16 format = ttUSHORT(data + index_map + 0);
+   if (format == 0) { // apple byte encoding
+      stbtt_int32 bytes = ttUSHORT(data + index_map + 2);
+      if (unicode_codepoint < bytes-6)
+         return ttBYTE(data + index_map + 6 + unicode_codepoint);
+      return 0;
+   } else if (format == 6) {
+      stbtt_uint32 first = ttUSHORT(data + index_map + 6);
+      stbtt_uint32 count = ttUSHORT(data + index_map + 8);
+      if ((stbtt_uint32) unicode_codepoint >= first && (stbtt_uint32) unicode_codepoint < first+count)
+         return ttUSHORT(data + index_map + 10 + (unicode_codepoint - first)*2);
+      return 0;
+   } else if (format == 2) {
+      STBTT_assert(0); // @TODO: high-byte mapping for japanese/chinese/korean
+      return 0;
+   } else if (format == 4) { // standard mapping for windows fonts: binary search collection of ranges
+      stbtt_uint16 segcount = ttUSHORT(data+index_map+6) >> 1;
+      stbtt_uint16 searchRange = ttUSHORT(data+index_map+8) >> 1;
+      stbtt_uint16 entrySelector = ttUSHORT(data+index_map+10);
+      stbtt_uint16 rangeShift = ttUSHORT(data+index_map+12) >> 1;
+
+      // do a binary search of the segments
+      stbtt_uint32 endCount = index_map + 14;
+      stbtt_uint32 search = endCount;
+
+      if (unicode_codepoint > 0xffff)
+         return 0;
+
+      // they lie from endCount .. endCount + segCount
+      // but searchRange is the nearest power of two, so...
+      if (unicode_codepoint >= ttUSHORT(data + search + rangeShift*2))
+         search += rangeShift*2;
+
+      // now decrement to bias correctly to find smallest
+      search -= 2;
+      while (entrySelector) {
+         stbtt_uint16 end;
+         searchRange >>= 1;
+         end = ttUSHORT(data + search + searchRange*2);
+         if (unicode_codepoint > end)
+            search += searchRange*2;
+         --entrySelector;
+      }
+      search += 2;
+
+      {
+         stbtt_uint16 offset, start, last;
+         stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1);
+
+         start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item);
+         last = ttUSHORT(data + endCount + 2*item);
+         if (unicode_codepoint < start || unicode_codepoint > last)
+            return 0;
+
+         offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item);
+         if (offset == 0)
+            return (stbtt_uint16) (unicode_codepoint + ttSHORT(data + index_map + 14 + segcount*4 + 2 + 2*item));
+
+         return ttUSHORT(data + offset + (unicode_codepoint-start)*2 + index_map + 14 + segcount*6 + 2 + 2*item);
+      }
+   } else if (format == 12 || format == 13) {
+      stbtt_uint32 ngroups = ttULONG(data+index_map+12);
+      stbtt_int32 low,high;
+      low = 0; high = (stbtt_int32)ngroups;
+      // Binary search the right group.
+      while (low < high) {
+         stbtt_int32 mid = low + ((high-low) >> 1); // rounds down, so low <= mid < high
+         stbtt_uint32 start_char = ttULONG(data+index_map+16+mid*12);
+         stbtt_uint32 end_char = ttULONG(data+index_map+16+mid*12+4);
+         if ((stbtt_uint32) unicode_codepoint < start_char)
+            high = mid;
+         else if ((stbtt_uint32) unicode_codepoint > end_char)
+            low = mid+1;
+         else {
+            stbtt_uint32 start_glyph = ttULONG(data+index_map+16+mid*12+8);
+            if (format == 12)
+               return start_glyph + unicode_codepoint-start_char;
+            else // format == 13
+               return start_glyph;
+         }
+      }
+      return 0; // not found
+   }
+   // @TODO
+   STBTT_assert(0);
+   return 0;
+}
+
+STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices)
+{
+   return stbtt_GetGlyphShape(info, stbtt_FindGlyphIndex(info, unicode_codepoint), vertices);
+}
+
+static void stbtt_setvertex(stbtt_vertex *v, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy)
+{
+   v->type = type;
+   v->x = (stbtt_int16) x;
+   v->y = (stbtt_int16) y;
+   v->cx = (stbtt_int16) cx;
+   v->cy = (stbtt_int16) cy;
+}
+
+static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index)
+{
+   int g1,g2;
+
+   STBTT_assert(!info->cff.size);
+
+   if (glyph_index >= info->numGlyphs) return -1; // glyph index out of range
+   if (info->indexToLocFormat >= 2)    return -1; // unknown index->glyph map format
+
+   if (info->indexToLocFormat == 0) {
+      g1 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2) * 2;
+      g2 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2 + 2) * 2;
+   } else {
+      g1 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4);
+      g2 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4 + 4);
+   }
+
+   return g1==g2 ? -1 : g1; // if length is 0, return -1
+}
+
+static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
+
+STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
+{
+   if (info->cff.size) {
+      stbtt__GetGlyphInfoT2(info, glyph_index, x0, y0, x1, y1);
+   } else {
+      int g = stbtt__GetGlyfOffset(info, glyph_index);
+      if (g < 0) return 0;
+
+      if (x0) *x0 = ttSHORT(info->data + g + 2);
+      if (y0) *y0 = ttSHORT(info->data + g + 4);
+      if (x1) *x1 = ttSHORT(info->data + g + 6);
+      if (y1) *y1 = ttSHORT(info->data + g + 8);
+   }
+   return 1;
+}
+
+STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1)
+{
+   return stbtt_GetGlyphBox(info, stbtt_FindGlyphIndex(info,codepoint), x0,y0,x1,y1);
+}
+
+STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index)
+{
+   stbtt_int16 numberOfContours;
+   int g;
+   if (info->cff.size)
+      return stbtt__GetGlyphInfoT2(info, glyph_index, NULL, NULL, NULL, NULL) == 0;
+   g = stbtt__GetGlyfOffset(info, glyph_index);
+   if (g < 0) return 1;
+   numberOfContours = ttSHORT(info->data + g);
+   return numberOfContours == 0;
+}
+
+static int stbtt__close_shape(stbtt_vertex *vertices, int num_vertices, int was_off, int start_off,
+    stbtt_int32 sx, stbtt_int32 sy, stbtt_int32 scx, stbtt_int32 scy, stbtt_int32 cx, stbtt_int32 cy)
+{
+   if (start_off) {
+      if (was_off)
+         stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+scx)>>1, (cy+scy)>>1, cx,cy);
+      stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, sx,sy,scx,scy);
+   } else {
+      if (was_off)
+         stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve,sx,sy,cx,cy);
+      else
+         stbtt_setvertex(&vertices[num_vertices++], STBTT_vline,sx,sy,0,0);
+   }
+   return num_vertices;
+}
+
+static int stbtt__GetGlyphShapeTT(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   stbtt_int16 numberOfContours;
+   stbtt_uint8 *endPtsOfContours;
+   stbtt_uint8 *data = info->data;
+   stbtt_vertex *vertices=0;
+   int num_vertices=0;
+   int g = stbtt__GetGlyfOffset(info, glyph_index);
+
+   *pvertices = NULL;
+
+   if (g < 0) return 0;
+
+   numberOfContours = ttSHORT(data + g);
+
+   if (numberOfContours > 0) {
+      stbtt_uint8 flags=0,flagcount;
+      stbtt_int32 ins, i,j=0,m,n, next_move, was_off=0, off, start_off=0;
+      stbtt_int32 x,y,cx,cy,sx,sy, scx,scy;
+      stbtt_uint8 *points;
+      endPtsOfContours = (data + g + 10);
+      ins = ttUSHORT(data + g + 10 + numberOfContours * 2);
+      points = data + g + 10 + numberOfContours * 2 + 2 + ins;
+
+      n = 1+ttUSHORT(endPtsOfContours + numberOfContours*2-2);
+
+      m = n + 2*numberOfContours;  // a loose bound on how many vertices we might need
+      vertices = (stbtt_vertex *) STBTT_malloc(m * sizeof(vertices[0]), info->userdata);
+      if (vertices == 0)
+         return 0;
+
+      next_move = 0;
+      flagcount=0;
+
+      // in first pass, we load uninterpreted data into the allocated array
+      // above, shifted to the end of the array so we won't overwrite it when
+      // we create our final data starting from the front
+
+      off = m - n; // starting offset for uninterpreted data, regardless of how m ends up being calculated
+
+      // first load flags
+
+      for (i=0; i < n; ++i) {
+         if (flagcount == 0) {
+            flags = *points++;
+            if (flags & 8)
+               flagcount = *points++;
+         } else
+            --flagcount;
+         vertices[off+i].type = flags;
+      }
+
+      // now load x coordinates
+      x=0;
+      for (i=0; i < n; ++i) {
+         flags = vertices[off+i].type;
+         if (flags & 2) {
+            stbtt_int16 dx = *points++;
+            x += (flags & 16) ? dx : -dx; // ???
+         } else {
+            if (!(flags & 16)) {
+               x = x + (stbtt_int16) (points[0]*256 + points[1]);
+               points += 2;
+            }
+         }
+         vertices[off+i].x = (stbtt_int16) x;
+      }
+
+      // now load y coordinates
+      y=0;
+      for (i=0; i < n; ++i) {
+         flags = vertices[off+i].type;
+         if (flags & 4) {
+            stbtt_int16 dy = *points++;
+            y += (flags & 32) ? dy : -dy; // ???
+         } else {
+            if (!(flags & 32)) {
+               y = y + (stbtt_int16) (points[0]*256 + points[1]);
+               points += 2;
+            }
+         }
+         vertices[off+i].y = (stbtt_int16) y;
+      }
+
+      // now convert them to our format
+      num_vertices=0;
+      sx = sy = cx = cy = scx = scy = 0;
+      for (i=0; i < n; ++i) {
+         flags = vertices[off+i].type;
+         x     = (stbtt_int16) vertices[off+i].x;
+         y     = (stbtt_int16) vertices[off+i].y;
+
+         if (next_move == i) {
+            if (i != 0)
+               num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy);
+
+            // now start the new one
+            start_off = !(flags & 1);
+            if (start_off) {
+               // if we start off with an off-curve point, then when we need to find a point on the curve
+               // where we can start, and we need to save some state for when we wraparound.
+               scx = x;
+               scy = y;
+               if (!(vertices[off+i+1].type & 1)) {
+                  // next point is also a curve point, so interpolate an on-point curve
+                  sx = (x + (stbtt_int32) vertices[off+i+1].x) >> 1;
+                  sy = (y + (stbtt_int32) vertices[off+i+1].y) >> 1;
+               } else {
+                  // otherwise just use the next point as our start point
+                  sx = (stbtt_int32) vertices[off+i+1].x;
+                  sy = (stbtt_int32) vertices[off+i+1].y;
+                  ++i; // we're using point i+1 as the starting point, so skip it
+               }
+            } else {
+               sx = x;
+               sy = y;
+            }
+            stbtt_setvertex(&vertices[num_vertices++], STBTT_vmove,sx,sy,0,0);
+            was_off = 0;
+            next_move = 1 + ttUSHORT(endPtsOfContours+j*2);
+            ++j;
+         } else {
+            if (!(flags & 1)) { // if it's a curve
+               if (was_off) // two off-curve control points in a row means interpolate an on-curve midpoint
+                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+x)>>1, (cy+y)>>1, cx, cy);
+               cx = x;
+               cy = y;
+               was_off = 1;
+            } else {
+               if (was_off)
+                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, x,y, cx, cy);
+               else
+                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vline, x,y,0,0);
+               was_off = 0;
+            }
+         }
+      }
+      num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy);
+   } else if (numberOfContours < 0) {
+      // Compound shapes.
+      int more = 1;
+      stbtt_uint8 *comp = data + g + 10;
+      num_vertices = 0;
+      vertices = 0;
+      while (more) {
+         stbtt_uint16 flags, gidx;
+         int comp_num_verts = 0, i;
+         stbtt_vertex *comp_verts = 0, *tmp = 0;
+         float mtx[6] = {1,0,0,1,0,0}, m, n;
+
+         flags = ttSHORT(comp); comp+=2;
+         gidx = ttSHORT(comp); comp+=2;
+
+         if (flags & 2) { // XY values
+            if (flags & 1) { // shorts
+               mtx[4] = ttSHORT(comp); comp+=2;
+               mtx[5] = ttSHORT(comp); comp+=2;
+            } else {
+               mtx[4] = ttCHAR(comp); comp+=1;
+               mtx[5] = ttCHAR(comp); comp+=1;
+            }
+         }
+         else {
+            // @TODO handle matching point
+            STBTT_assert(0);
+         }
+         if (flags & (1<<3)) { // WE_HAVE_A_SCALE
+            mtx[0] = mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[1] = mtx[2] = 0;
+         } else if (flags & (1<<6)) { // WE_HAVE_AN_X_AND_YSCALE
+            mtx[0] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[1] = mtx[2] = 0;
+            mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
+         } else if (flags & (1<<7)) { // WE_HAVE_A_TWO_BY_TWO
+            mtx[0] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[1] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[2] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
+         }
+
+         // Find transformation scales.
+         m = (float) STBTT_sqrt(mtx[0]*mtx[0] + mtx[1]*mtx[1]);
+         n = (float) STBTT_sqrt(mtx[2]*mtx[2] + mtx[3]*mtx[3]);
+
+         // Get indexed glyph.
+         comp_num_verts = stbtt_GetGlyphShape(info, gidx, &comp_verts);
+         if (comp_num_verts > 0) {
+            // Transform vertices.
+            for (i = 0; i < comp_num_verts; ++i) {
+               stbtt_vertex* v = &comp_verts[i];
+               stbtt_vertex_type x,y;
+               x=v->x; y=v->y;
+               v->x = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4]));
+               v->y = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5]));
+               x=v->cx; y=v->cy;
+               v->cx = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4]));
+               v->cy = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5]));
+            }
+            // Append vertices.
+            tmp = (stbtt_vertex*)STBTT_malloc((num_vertices+comp_num_verts)*sizeof(stbtt_vertex), info->userdata);
+            if (!tmp) {
+               if (vertices) STBTT_free(vertices, info->userdata);
+               if (comp_verts) STBTT_free(comp_verts, info->userdata);
+               return 0;
+            }
+            if (num_vertices > 0 && vertices) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex));
+            STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex));
+            if (vertices) STBTT_free(vertices, info->userdata);
+            vertices = tmp;
+            STBTT_free(comp_verts, info->userdata);
+            num_vertices += comp_num_verts;
+         }
+         // More components ?
+         more = flags & (1<<5);
+      }
+   } else {
+      // numberOfCounters == 0, do nothing
+   }
+
+   *pvertices = vertices;
+   return num_vertices;
+}
+
+typedef struct
+{
+   int bounds;
+   int started;
+   float first_x, first_y;
+   float x, y;
+   stbtt_int32 min_x, max_x, min_y, max_y;
+
+   stbtt_vertex *pvertices;
+   int num_vertices;
+} stbtt__csctx;
+
+#define STBTT__CSCTX_INIT(bounds) {bounds,0, 0,0, 0,0, 0,0,0,0, NULL, 0}
+
+static void stbtt__track_vertex(stbtt__csctx *c, stbtt_int32 x, stbtt_int32 y)
+{
+   if (x > c->max_x || !c->started) c->max_x = x;
+   if (y > c->max_y || !c->started) c->max_y = y;
+   if (x < c->min_x || !c->started) c->min_x = x;
+   if (y < c->min_y || !c->started) c->min_y = y;
+   c->started = 1;
+}
+
+static void stbtt__csctx_v(stbtt__csctx *c, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy, stbtt_int32 cx1, stbtt_int32 cy1)
+{
+   if (c->bounds) {
+      stbtt__track_vertex(c, x, y);
+      if (type == STBTT_vcubic) {
+         stbtt__track_vertex(c, cx, cy);
+         stbtt__track_vertex(c, cx1, cy1);
+      }
+   } else {
+      stbtt_setvertex(&c->pvertices[c->num_vertices], type, x, y, cx, cy);
+      c->pvertices[c->num_vertices].cx1 = (stbtt_int16) cx1;
+      c->pvertices[c->num_vertices].cy1 = (stbtt_int16) cy1;
+   }
+   c->num_vertices++;
+}
+
+static void stbtt__csctx_close_shape(stbtt__csctx *ctx)
+{
+   if (ctx->first_x != ctx->x || ctx->first_y != ctx->y)
+      stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->first_x, (int)ctx->first_y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rmove_to(stbtt__csctx *ctx, float dx, float dy)
+{
+   stbtt__csctx_close_shape(ctx);
+   ctx->first_x = ctx->x = ctx->x + dx;
+   ctx->first_y = ctx->y = ctx->y + dy;
+   stbtt__csctx_v(ctx, STBTT_vmove, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rline_to(stbtt__csctx *ctx, float dx, float dy)
+{
+   ctx->x += dx;
+   ctx->y += dy;
+   stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rccurve_to(stbtt__csctx *ctx, float dx1, float dy1, float dx2, float dy2, float dx3, float dy3)
+{
+   float cx1 = ctx->x + dx1;
+   float cy1 = ctx->y + dy1;
+   float cx2 = cx1 + dx2;
+   float cy2 = cy1 + dy2;
+   ctx->x = cx2 + dx3;
+   ctx->y = cy2 + dy3;
+   stbtt__csctx_v(ctx, STBTT_vcubic, (int)ctx->x, (int)ctx->y, (int)cx1, (int)cy1, (int)cx2, (int)cy2);
+}
+
+static stbtt__buf stbtt__get_subr(stbtt__buf idx, int n)
+{
+   int count = stbtt__cff_index_count(&idx);
+   int bias = 107;
+   if (count >= 33900)
+      bias = 32768;
+   else if (count >= 1240)
+      bias = 1131;
+   n += bias;
+   if (n < 0 || n >= count)
+      return stbtt__new_buf(NULL, 0);
+   return stbtt__cff_index_get(idx, n);
+}
+
+static stbtt__buf stbtt__cid_get_glyph_subrs(const stbtt_fontinfo *info, int glyph_index)
+{
+   stbtt__buf fdselect = info->fdselect;
+   int nranges, start, end, v, fmt, fdselector = -1, i;
+
+   stbtt__buf_seek(&fdselect, 0);
+   fmt = stbtt__buf_get8(&fdselect);
+   if (fmt == 0) {
+      // untested
+      stbtt__buf_skip(&fdselect, glyph_index);
+      fdselector = stbtt__buf_get8(&fdselect);
+   } else if (fmt == 3) {
+      nranges = stbtt__buf_get16(&fdselect);
+      start = stbtt__buf_get16(&fdselect);
+      for (i = 0; i < nranges; i++) {
+         v = stbtt__buf_get8(&fdselect);
+         end = stbtt__buf_get16(&fdselect);
+         if (glyph_index >= start && glyph_index < end) {
+            fdselector = v;
+            break;
+         }
+         start = end;
+      }
+   }
+   if (fdselector == -1) stbtt__new_buf(NULL, 0);
+   return stbtt__get_subrs(info->cff, stbtt__cff_index_get(info->fontdicts, fdselector));
+}
+
+static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, stbtt__csctx *c)
+{
+   int in_header = 1, maskbits = 0, subr_stack_height = 0, sp = 0, v, i, b0;
+   int has_subrs = 0, clear_stack;
+   float s[48];
+   stbtt__buf subr_stack[10], subrs = info->subrs, b;
+   float f;
+
+#define STBTT__CSERR(s) (0)
+
+   // this currently ignores the initial width value, which isn't needed if we have hmtx
+   b = stbtt__cff_index_get(info->charstrings, glyph_index);
+   while (b.cursor < b.size) {
+      i = 0;
+      clear_stack = 1;
+      b0 = stbtt__buf_get8(&b);
+      switch (b0) {
+      // @TODO implement hinting
+      case 0x13: // hintmask
+      case 0x14: // cntrmask
+         if (in_header)
+            maskbits += (sp / 2); // implicit "vstem"
+         in_header = 0;
+         stbtt__buf_skip(&b, (maskbits + 7) / 8);
+         break;
+
+      case 0x01: // hstem
+      case 0x03: // vstem
+      case 0x12: // hstemhm
+      case 0x17: // vstemhm
+         maskbits += (sp / 2);
+         break;
+
+      case 0x15: // rmoveto
+         in_header = 0;
+         if (sp < 2) return STBTT__CSERR("rmoveto stack");
+         stbtt__csctx_rmove_to(c, s[sp-2], s[sp-1]);
+         break;
+      case 0x04: // vmoveto
+         in_header = 0;
+         if (sp < 1) return STBTT__CSERR("vmoveto stack");
+         stbtt__csctx_rmove_to(c, 0, s[sp-1]);
+         break;
+      case 0x16: // hmoveto
+         in_header = 0;
+         if (sp < 1) return STBTT__CSERR("hmoveto stack");
+         stbtt__csctx_rmove_to(c, s[sp-1], 0);
+         break;
+
+      case 0x05: // rlineto
+         if (sp < 2) return STBTT__CSERR("rlineto stack");
+         for (; i + 1 < sp; i += 2)
+            stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         break;
+
+      // hlineto/vlineto and vhcurveto/hvcurveto alternate horizontal and vertical
+      // starting from a different place.
+
+      case 0x07: // vlineto
+         if (sp < 1) return STBTT__CSERR("vlineto stack");
+         goto vlineto;
+      case 0x06: // hlineto
+         if (sp < 1) return STBTT__CSERR("hlineto stack");
+         for (;;) {
+            if (i >= sp) break;
+            stbtt__csctx_rline_to(c, s[i], 0);
+            i++;
+      vlineto:
+            if (i >= sp) break;
+            stbtt__csctx_rline_to(c, 0, s[i]);
+            i++;
+         }
+         break;
+
+      case 0x1F: // hvcurveto
+         if (sp < 4) return STBTT__CSERR("hvcurveto stack");
+         goto hvcurveto;
+      case 0x1E: // vhcurveto
+         if (sp < 4) return STBTT__CSERR("vhcurveto stack");
+         for (;;) {
+            if (i + 3 >= sp) break;
+            stbtt__csctx_rccurve_to(c, 0, s[i], s[i+1], s[i+2], s[i+3], (sp - i == 5) ? s[i + 4] : 0.0f);
+            i += 4;
+      hvcurveto:
+            if (i + 3 >= sp) break;
+            stbtt__csctx_rccurve_to(c, s[i], 0, s[i+1], s[i+2], (sp - i == 5) ? s[i+4] : 0.0f, s[i+3]);
+            i += 4;
+         }
+         break;
+
+      case 0x08: // rrcurveto
+         if (sp < 6) return STBTT__CSERR("rcurveline stack");
+         for (; i + 5 < sp; i += 6)
+            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         break;
+
+      case 0x18: // rcurveline
+         if (sp < 8) return STBTT__CSERR("rcurveline stack");
+         for (; i + 5 < sp - 2; i += 6)
+            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         if (i + 1 >= sp) return STBTT__CSERR("rcurveline stack");
+         stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         break;
+
+      case 0x19: // rlinecurve
+         if (sp < 8) return STBTT__CSERR("rlinecurve stack");
+         for (; i + 1 < sp - 6; i += 2)
+            stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         if (i + 5 >= sp) return STBTT__CSERR("rlinecurve stack");
+         stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         break;
+
+      case 0x1A: // vvcurveto
+      case 0x1B: // hhcurveto
+         if (sp < 4) return STBTT__CSERR("(vv|hh)curveto stack");
+         f = 0.0;
+         if (sp & 1) { f = s[i]; i++; }
+         for (; i + 3 < sp; i += 4) {
+            if (b0 == 0x1B)
+               stbtt__csctx_rccurve_to(c, s[i], f, s[i+1], s[i+2], s[i+3], 0.0);
+            else
+               stbtt__csctx_rccurve_to(c, f, s[i], s[i+1], s[i+2], 0.0, s[i+3]);
+            f = 0.0;
+         }
+         break;
+
+      case 0x0A: // callsubr
+         if (!has_subrs) {
+            if (info->fdselect.size)
+               subrs = stbtt__cid_get_glyph_subrs(info, glyph_index);
+            has_subrs = 1;
+         }
+         // FALLTHROUGH
+      case 0x1D: // callgsubr
+         if (sp < 1) return STBTT__CSERR("call(g|)subr stack");
+         v = (int) s[--sp];
+         if (subr_stack_height >= 10) return STBTT__CSERR("recursion limit");
+         subr_stack[subr_stack_height++] = b;
+         b = stbtt__get_subr(b0 == 0x0A ? subrs : info->gsubrs, v);
+         if (b.size == 0) return STBTT__CSERR("subr not found");
+         b.cursor = 0;
+         clear_stack = 0;
+         break;
+
+      case 0x0B: // return
+         if (subr_stack_height <= 0) return STBTT__CSERR("return outside subr");
+         b = subr_stack[--subr_stack_height];
+         clear_stack = 0;
+         break;
+
+      case 0x0E: // endchar
+         stbtt__csctx_close_shape(c);
+         return 1;
+
+      case 0x0C: { // two-byte escape
+         float dx1, dx2, dx3, dx4, dx5, dx6, dy1, dy2, dy3, dy4, dy5, dy6;
+         float dx, dy;
+         int b1 = stbtt__buf_get8(&b);
+         switch (b1) {
+         // @TODO These "flex" implementations ignore the flex-depth and resolution,
+         // and always draw beziers.
+         case 0x22: // hflex
+            if (sp < 7) return STBTT__CSERR("hflex stack");
+            dx1 = s[0];
+            dx2 = s[1];
+            dy2 = s[2];
+            dx3 = s[3];
+            dx4 = s[4];
+            dx5 = s[5];
+            dx6 = s[6];
+            stbtt__csctx_rccurve_to(c, dx1, 0, dx2, dy2, dx3, 0);
+            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, -dy2, dx6, 0);
+            break;
+
+         case 0x23: // flex
+            if (sp < 13) return STBTT__CSERR("flex stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dy3 = s[5];
+            dx4 = s[6];
+            dy4 = s[7];
+            dx5 = s[8];
+            dy5 = s[9];
+            dx6 = s[10];
+            dy6 = s[11];
+            //fd is s[12]
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
+            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
+            break;
+
+         case 0x24: // hflex1
+            if (sp < 9) return STBTT__CSERR("hflex1 stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dx4 = s[5];
+            dx5 = s[6];
+            dy5 = s[7];
+            dx6 = s[8];
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, 0);
+            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, dy5, dx6, -(dy1+dy2+dy5));
+            break;
+
+         case 0x25: // flex1
+            if (sp < 11) return STBTT__CSERR("flex1 stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dy3 = s[5];
+            dx4 = s[6];
+            dy4 = s[7];
+            dx5 = s[8];
+            dy5 = s[9];
+            dx6 = dy6 = s[10];
+            dx = dx1+dx2+dx3+dx4+dx5;
+            dy = dy1+dy2+dy3+dy4+dy5;
+            if (STBTT_fabs(dx) > STBTT_fabs(dy))
+               dy6 = -dy;
+            else
+               dx6 = -dx;
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
+            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
+            break;
+
+         default:
+            return STBTT__CSERR("unimplemented");
+         }
+      } break;
+
+      default:
+         if (b0 != 255 && b0 != 28 && b0 < 32)
+            return STBTT__CSERR("reserved operator");
+
+         // push immediate
+         if (b0 == 255) {
+            f = (float)(stbtt_int32)stbtt__buf_get32(&b) / 0x10000;
+         } else {
+            stbtt__buf_skip(&b, -1);
+            f = (float)(stbtt_int16)stbtt__cff_int(&b);
+         }
+         if (sp >= 48) return STBTT__CSERR("push stack overflow");
+         s[sp++] = f;
+         clear_stack = 0;
+         break;
+      }
+      if (clear_stack) sp = 0;
+   }
+   return STBTT__CSERR("no endchar");
+
+#undef STBTT__CSERR
+}
+
+static int stbtt__GetGlyphShapeT2(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   // runs the charstring twice, once to count and once to output (to avoid realloc)
+   stbtt__csctx count_ctx = STBTT__CSCTX_INIT(1);
+   stbtt__csctx output_ctx = STBTT__CSCTX_INIT(0);
+   if (stbtt__run_charstring(info, glyph_index, &count_ctx)) {
+      *pvertices = (stbtt_vertex*)STBTT_malloc(count_ctx.num_vertices*sizeof(stbtt_vertex), info->userdata);
+      output_ctx.pvertices = *pvertices;
+      if (stbtt__run_charstring(info, glyph_index, &output_ctx)) {
+         STBTT_assert(output_ctx.num_vertices == count_ctx.num_vertices);
+         return output_ctx.num_vertices;
+      }
+   }
+   *pvertices = NULL;
+   return 0;
+}
+
+static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
+{
+   stbtt__csctx c = STBTT__CSCTX_INIT(1);
+   int r = stbtt__run_charstring(info, glyph_index, &c);
+   if (x0)  *x0 = r ? c.min_x : 0;
+   if (y0)  *y0 = r ? c.min_y : 0;
+   if (x1)  *x1 = r ? c.max_x : 0;
+   if (y1)  *y1 = r ? c.max_y : 0;
+   return r ? c.num_vertices : 0;
+}
+
+STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   if (!info->cff.size)
+      return stbtt__GetGlyphShapeTT(info, glyph_index, pvertices);
+   else
+      return stbtt__GetGlyphShapeT2(info, glyph_index, pvertices);
+}
+
+STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing)
+{
+   stbtt_uint16 numOfLongHorMetrics = ttUSHORT(info->data+info->hhea + 34);
+   if (glyph_index < numOfLongHorMetrics) {
+      if (advanceWidth)     *advanceWidth    = ttSHORT(info->data + info->hmtx + 4*glyph_index);
+      if (leftSideBearing)  *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*glyph_index + 2);
+   } else {
+      if (advanceWidth)     *advanceWidth    = ttSHORT(info->data + info->hmtx + 4*(numOfLongHorMetrics-1));
+      if (leftSideBearing)  *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*numOfLongHorMetrics + 2*(glyph_index - numOfLongHorMetrics));
+   }
+}
+
+STBTT_DEF int  stbtt_GetKerningTableLength(const stbtt_fontinfo *info)
+{
+   stbtt_uint8 *data = info->data + info->kern;
+
+   // we only look at the first table. it must be 'horizontal' and format 0.
+   if (!info->kern)
+      return 0;
+   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
+      return 0;
+   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
+      return 0;
+
+   return ttUSHORT(data+10);
+}
+
+STBTT_DEF int stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningentry* table, int table_length)
+{
+   stbtt_uint8 *data = info->data + info->kern;
+   int k, length;
+
+   // we only look at the first table. it must be 'horizontal' and format 0.
+   if (!info->kern)
+      return 0;
+   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
+      return 0;
+   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
+      return 0;
+
+   length = ttUSHORT(data+10);
+   if (table_length < length)
+      length = table_length;
+
+   for (k = 0; k < length; k++)
+   {
+      table[k].glyph1 = ttUSHORT(data+18+(k*6));
+      table[k].glyph2 = ttUSHORT(data+20+(k*6));
+      table[k].advance = ttSHORT(data+22+(k*6));
+   }
+
+   return length;
+}
+
+static int stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
+{
+   stbtt_uint8 *data = info->data + info->kern;
+   stbtt_uint32 needle, straw;
+   int l, r, m;
+
+   // we only look at the first table. it must be 'horizontal' and format 0.
+   if (!info->kern)
+      return 0;
+   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
+      return 0;
+   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
+      return 0;
+
+   l = 0;
+   r = ttUSHORT(data+10) - 1;
+   needle = glyph1 << 16 | glyph2;
+   while (l <= r) {
+      m = (l + r) >> 1;
+      straw = ttULONG(data+18+(m*6)); // note: unaligned read
+      if (needle < straw)
+         r = m - 1;
+      else if (needle > straw)
+         l = m + 1;
+      else
+         return ttSHORT(data+22+(m*6));
+   }
+   return 0;
+}
+
+static stbtt_int32 stbtt__GetCoverageIndex(stbtt_uint8 *coverageTable, int glyph)
+{
+   stbtt_uint16 coverageFormat = ttUSHORT(coverageTable);
+   switch (coverageFormat) {
+      case 1: {
+         stbtt_uint16 glyphCount = ttUSHORT(coverageTable + 2);
+
+         // Binary search.
+         stbtt_int32 l=0, r=glyphCount-1, m;
+         int straw, needle=glyph;
+         while (l <= r) {
+            stbtt_uint8 *glyphArray = coverageTable + 4;
+            stbtt_uint16 glyphID;
+            m = (l + r) >> 1;
+            glyphID = ttUSHORT(glyphArray + 2 * m);
+            straw = glyphID;
+            if (needle < straw)
+               r = m - 1;
+            else if (needle > straw)
+               l = m + 1;
+            else {
+               return m;
+            }
+         }
+         break;
+      }
+
+      case 2: {
+         stbtt_uint16 rangeCount = ttUSHORT(coverageTable + 2);
+         stbtt_uint8 *rangeArray = coverageTable + 4;
+
+         // Binary search.
+         stbtt_int32 l=0, r=rangeCount-1, m;
+         int strawStart, strawEnd, needle=glyph;
+         while (l <= r) {
+            stbtt_uint8 *rangeRecord;
+            m = (l + r) >> 1;
+            rangeRecord = rangeArray + 6 * m;
+            strawStart = ttUSHORT(rangeRecord);
+            strawEnd = ttUSHORT(rangeRecord + 2);
+            if (needle < strawStart)
+               r = m - 1;
+            else if (needle > strawEnd)
+               l = m + 1;
+            else {
+               stbtt_uint16 startCoverageIndex = ttUSHORT(rangeRecord + 4);
+               return startCoverageIndex + glyph - strawStart;
+            }
+         }
+         break;
+      }
+
+      default: return -1; // unsupported
+   }
+
+   return -1;
+}
+
+static stbtt_int32  stbtt__GetGlyphClass(stbtt_uint8 *classDefTable, int glyph)
+{
+   stbtt_uint16 classDefFormat = ttUSHORT(classDefTable);
+   switch (classDefFormat)
+   {
+      case 1: {
+         stbtt_uint16 startGlyphID = ttUSHORT(classDefTable + 2);
+         stbtt_uint16 glyphCount = ttUSHORT(classDefTable + 4);
+         stbtt_uint8 *classDef1ValueArray = classDefTable + 6;
+
+         if (glyph >= startGlyphID && glyph < startGlyphID + glyphCount)
+            return (stbtt_int32)ttUSHORT(classDef1ValueArray + 2 * (glyph - startGlyphID));
+         break;
+      }
+
+      case 2: {
+         stbtt_uint16 classRangeCount = ttUSHORT(classDefTable + 2);
+         stbtt_uint8 *classRangeRecords = classDefTable + 4;
+
+         // Binary search.
+         stbtt_int32 l=0, r=classRangeCount-1, m;
+         int strawStart, strawEnd, needle=glyph;
+         while (l <= r) {
+            stbtt_uint8 *classRangeRecord;
+            m = (l + r) >> 1;
+            classRangeRecord = classRangeRecords + 6 * m;
+            strawStart = ttUSHORT(classRangeRecord);
+            strawEnd = ttUSHORT(classRangeRecord + 2);
+            if (needle < strawStart)
+               r = m - 1;
+            else if (needle > strawEnd)
+               l = m + 1;
+            else
+               return (stbtt_int32)ttUSHORT(classRangeRecord + 4);
+         }
+         break;
+      }
+
+      default:
+         return -1; // Unsupported definition type, return an error.
+   }
+
+   // "All glyphs not assigned to a class fall into class 0". (OpenType spec)
+   return 0;
+}
+
+// Define to STBTT_assert(x) if you want to break on unimplemented formats.
+#define STBTT_GPOS_TODO_assert(x)
+
+static stbtt_int32 stbtt__GetGlyphGPOSInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
+{
+   stbtt_uint16 lookupListOffset;
+   stbtt_uint8 *lookupList;
+   stbtt_uint16 lookupCount;
+   stbtt_uint8 *data;
+   stbtt_int32 i, sti;
+
+   if (!info->gpos) return 0;
+
+   data = info->data + info->gpos;
+
+   if (ttUSHORT(data+0) != 1) return 0; // Major version 1
+   if (ttUSHORT(data+2) != 0) return 0; // Minor version 0
+
+   lookupListOffset = ttUSHORT(data+8);
+   lookupList = data + lookupListOffset;
+   lookupCount = ttUSHORT(lookupList);
+
+   for (i=0; i<lookupCount; ++i) {
+      stbtt_uint16 lookupOffset = ttUSHORT(lookupList + 2 + 2 * i);
+      stbtt_uint8 *lookupTable = lookupList + lookupOffset;
+
+      stbtt_uint16 lookupType = ttUSHORT(lookupTable);
+      stbtt_uint16 subTableCount = ttUSHORT(lookupTable + 4);
+      stbtt_uint8 *subTableOffsets = lookupTable + 6;
+      if (lookupType != 2) // Pair Adjustment Positioning Subtable
+         continue;
+
+      for (sti=0; sti<subTableCount; sti++) {
+         stbtt_uint16 subtableOffset = ttUSHORT(subTableOffsets + 2 * sti);
+         stbtt_uint8 *table = lookupTable + subtableOffset;
+         stbtt_uint16 posFormat = ttUSHORT(table);
+         stbtt_uint16 coverageOffset = ttUSHORT(table + 2);
+         stbtt_int32 coverageIndex = stbtt__GetCoverageIndex(table + coverageOffset, glyph1);
+         if (coverageIndex == -1) continue;
+
+         switch (posFormat) {
+            case 1: {
+               stbtt_int32 l, r, m;
+               int straw, needle;
+               stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
+               stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
+               if (valueFormat1 == 4 && valueFormat2 == 0) { // Support more formats?
+                  stbtt_int32 valueRecordPairSizeInBytes = 2;
+                  stbtt_uint16 pairSetCount = ttUSHORT(table + 8);
+                  stbtt_uint16 pairPosOffset = ttUSHORT(table + 10 + 2 * coverageIndex);
+                  stbtt_uint8 *pairValueTable = table + pairPosOffset;
+                  stbtt_uint16 pairValueCount = ttUSHORT(pairValueTable);
+                  stbtt_uint8 *pairValueArray = pairValueTable + 2;
+
+                  if (coverageIndex >= pairSetCount) return 0;
+
+                  needle=glyph2;
+                  r=pairValueCount-1;
+                  l=0;
+
+                  // Binary search.
+                  while (l <= r) {
+                     stbtt_uint16 secondGlyph;
+                     stbtt_uint8 *pairValue;
+                     m = (l + r) >> 1;
+                     pairValue = pairValueArray + (2 + valueRecordPairSizeInBytes) * m;
+                     secondGlyph = ttUSHORT(pairValue);
+                     straw = secondGlyph;
+                     if (needle < straw)
+                        r = m - 1;
+                     else if (needle > straw)
+                        l = m + 1;
+                     else {
+                        stbtt_int16 xAdvance = ttSHORT(pairValue + 2);
+                        return xAdvance;
+                     }
+                  }
+               } else
+                  return 0;
+               break;
+            }
+
+            case 2: {
+               stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
+               stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
+               if (valueFormat1 == 4 && valueFormat2 == 0) { // Support more formats?
+                  stbtt_uint16 classDef1Offset = ttUSHORT(table + 8);
+                  stbtt_uint16 classDef2Offset = ttUSHORT(table + 10);
+                  int glyph1class = stbtt__GetGlyphClass(table + classDef1Offset, glyph1);
+                  int glyph2class = stbtt__GetGlyphClass(table + classDef2Offset, glyph2);
+
+                  stbtt_uint16 class1Count = ttUSHORT(table + 12);
+                  stbtt_uint16 class2Count = ttUSHORT(table + 14);
+                  stbtt_uint8 *class1Records, *class2Records;
+                  stbtt_int16 xAdvance;
+
+                  if (glyph1class < 0 || glyph1class >= class1Count) return 0; // malformed
+                  if (glyph2class < 0 || glyph2class >= class2Count) return 0; // malformed
+
+                  class1Records = table + 16;
+                  class2Records = class1Records + 2 * (glyph1class * class2Count);
+                  xAdvance = ttSHORT(class2Records + 2 * glyph2class);
+                  return xAdvance;
+               } else
+                  return 0;
+               break;
+            }
+
+            default:
+               return 0; // Unsupported position format
+         }
+      }
+   }
+
+   return 0;
+}
+
+STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int g1, int g2)
+{
+   int xAdvance = 0;
+
+   if (info->gpos)
+      xAdvance += stbtt__GetGlyphGPOSInfoAdvance(info, g1, g2);
+   else if (info->kern)
+      xAdvance += stbtt__GetGlyphKernInfoAdvance(info, g1, g2);
+
+   return xAdvance;
+}
+
+STBTT_DEF int  stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2)
+{
+   if (!info->kern && !info->gpos) // if no kerning table, don't waste time looking up both codepoint->glyphs
+      return 0;
+   return stbtt_GetGlyphKernAdvance(info, stbtt_FindGlyphIndex(info,ch1), stbtt_FindGlyphIndex(info,ch2));
+}
+
+STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing)
+{
+   stbtt_GetGlyphHMetrics(info, stbtt_FindGlyphIndex(info,codepoint), advanceWidth, leftSideBearing);
+}
+
+STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap)
+{
+   if (ascent ) *ascent  = ttSHORT(info->data+info->hhea + 4);
+   if (descent) *descent = ttSHORT(info->data+info->hhea + 6);
+   if (lineGap) *lineGap = ttSHORT(info->data+info->hhea + 8);
+}
+
+STBTT_DEF int  stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap)
+{
+   int tab = stbtt__find_table(info->data, info->fontstart, "OS/2");
+   if (!tab)
+      return 0;
+   if (typoAscent ) *typoAscent  = ttSHORT(info->data+tab + 68);
+   if (typoDescent) *typoDescent = ttSHORT(info->data+tab + 70);
+   if (typoLineGap) *typoLineGap = ttSHORT(info->data+tab + 72);
+   return 1;
+}
+
+STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1)
+{
+   *x0 = ttSHORT(info->data + info->head + 36);
+   *y0 = ttSHORT(info->data + info->head + 38);
+   *x1 = ttSHORT(info->data + info->head + 40);
+   *y1 = ttSHORT(info->data + info->head + 42);
+}
+
+STBTT_DEF float stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, float height)
+{
+   int fheight = ttSHORT(info->data + info->hhea + 4) - ttSHORT(info->data + info->hhea + 6);
+   return (float) height / fheight;
+}
+
+STBTT_DEF float stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, float pixels)
+{
+   int unitsPerEm = ttUSHORT(info->data + info->head + 18);
+   return pixels / unitsPerEm;
+}
+
+STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *v)
+{
+   STBTT_free(v, info->userdata);
+}
+
+STBTT_DEF stbtt_uint8 *stbtt_FindSVGDoc(const stbtt_fontinfo *info, int gl)
+{
+   int i;
+   stbtt_uint8 *data = info->data;
+   stbtt_uint8 *svg_doc_list = data + stbtt__get_svg((stbtt_fontinfo *) info);
+
+   int numEntries = ttUSHORT(svg_doc_list);
+   stbtt_uint8 *svg_docs = svg_doc_list + 2;
+
+   for(i=0; i<numEntries; i++) {
+      stbtt_uint8 *svg_doc = svg_docs + (12 * i);
+      if ((gl >= ttUSHORT(svg_doc)) && (gl <= ttUSHORT(svg_doc + 2)))
+         return svg_doc;
+   }
+   return 0;
+}
+
+STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg)
+{
+   stbtt_uint8 *data = info->data;
+   stbtt_uint8 *svg_doc;
+
+   if (info->svg == 0)
+      return 0;
+
+   svg_doc = stbtt_FindSVGDoc(info, gl);
+   if (svg_doc != NULL) {
+      *svg = (char *) data + info->svg + ttULONG(svg_doc + 4);
+      return ttULONG(svg_doc + 8);
+   } else {
+      return 0;
+   }
+}
+
+STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg)
+{
+   return stbtt_GetGlyphSVG(info, stbtt_FindGlyphIndex(info, unicode_codepoint), svg);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// antialiasing software rasterizer
+//
+
+STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y,float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   int x0=0,y0=0,x1,y1; // =0 suppresses compiler warning
+   if (!stbtt_GetGlyphBox(font, glyph, &x0,&y0,&x1,&y1)) {
+      // e.g. space character
+      if (ix0) *ix0 = 0;
+      if (iy0) *iy0 = 0;
+      if (ix1) *ix1 = 0;
+      if (iy1) *iy1 = 0;
+   } else {
+      // move to integral bboxes (treating pixels as little squares, what pixels get touched)?
+      if (ix0) *ix0 = STBTT_ifloor( x0 * scale_x + shift_x);
+      if (iy0) *iy0 = STBTT_ifloor(-y1 * scale_y + shift_y);
+      if (ix1) *ix1 = STBTT_iceil ( x1 * scale_x + shift_x);
+      if (iy1) *iy1 = STBTT_iceil (-y0 * scale_y + shift_y);
+   }
+}
+
+STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   stbtt_GetGlyphBitmapBoxSubpixel(font, glyph, scale_x, scale_y,0.0f,0.0f, ix0, iy0, ix1, iy1);
+}
+
+STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   stbtt_GetGlyphBitmapBoxSubpixel(font, stbtt_FindGlyphIndex(font,codepoint), scale_x, scale_y,shift_x,shift_y, ix0,iy0,ix1,iy1);
+}
+
+STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   stbtt_GetCodepointBitmapBoxSubpixel(font, codepoint, scale_x, scale_y,0.0f,0.0f, ix0,iy0,ix1,iy1);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Rasterizer
+
+typedef struct stbtt__hheap_chunk
+{
+   struct stbtt__hheap_chunk *next;
+} stbtt__hheap_chunk;
+
+typedef struct stbtt__hheap
+{
+   struct stbtt__hheap_chunk *head;
+   void   *first_free;
+   int    num_remaining_in_head_chunk;
+} stbtt__hheap;
+
+static void *stbtt__hheap_alloc(stbtt__hheap *hh, size_t size, void *userdata)
+{
+   if (hh->first_free) {
+      void *p = hh->first_free;
+      hh->first_free = * (void **) p;
+      return p;
+   } else {
+      if (hh->num_remaining_in_head_chunk == 0) {
+         int count = (size < 32 ? 2000 : size < 128 ? 800 : 100);
+         stbtt__hheap_chunk *c = (stbtt__hheap_chunk *) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata);
+         if (c == NULL)
+            return NULL;
+         c->next = hh->head;
+         hh->head = c;
+         hh->num_remaining_in_head_chunk = count;
+      }
+      --hh->num_remaining_in_head_chunk;
+      return (char *) (hh->head) + sizeof(stbtt__hheap_chunk) + size * hh->num_remaining_in_head_chunk;
+   }
+}
+
+static void stbtt__hheap_free(stbtt__hheap *hh, void *p)
+{
+   *(void **) p = hh->first_free;
+   hh->first_free = p;
+}
+
+static void stbtt__hheap_cleanup(stbtt__hheap *hh, void *userdata)
+{
+   stbtt__hheap_chunk *c = hh->head;
+   while (c) {
+      stbtt__hheap_chunk *n = c->next;
+      STBTT_free(c, userdata);
+      c = n;
+   }
+}
+
+typedef struct stbtt__edge {
+   float x0,y0, x1,y1;
+   int invert;
+} stbtt__edge;
+
+
+typedef struct stbtt__active_edge
+{
+   struct stbtt__active_edge *next;
+   #if STBTT_RASTERIZER_VERSION==1
+   int x,dx;
+   float ey;
+   int direction;
+   #elif STBTT_RASTERIZER_VERSION==2
+   float fx,fdx,fdy;
+   float direction;
+   float sy;
+   float ey;
+   #else
+   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+   #endif
+} stbtt__active_edge;
+
+#if STBTT_RASTERIZER_VERSION == 1
+#define STBTT_FIXSHIFT   10
+#define STBTT_FIX        (1 << STBTT_FIXSHIFT)
+#define STBTT_FIXMASK    (STBTT_FIX-1)
+
+static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata)
+{
+   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
+   float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
+   STBTT_assert(z != NULL);
+   if (!z) return z;
+
+   // round dx down to avoid overshooting
+   if (dxdy < 0)
+      z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy);
+   else
+      z->dx = STBTT_ifloor(STBTT_FIX * dxdy);
+
+   z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount
+   z->x -= off_x * STBTT_FIX;
+
+   z->ey = e->y1;
+   z->next = 0;
+   z->direction = e->invert ? 1 : -1;
+   return z;
+}
+#elif STBTT_RASTERIZER_VERSION == 2
+static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata)
+{
+   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
+   float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
+   STBTT_assert(z != NULL);
+   //STBTT_assert(e->y0 <= start_point);
+   if (!z) return z;
+   z->fdx = dxdy;
+   z->fdy = dxdy != 0.0f ? (1.0f/dxdy) : 0.0f;
+   z->fx = e->x0 + dxdy * (start_point - e->y0);
+   z->fx -= off_x;
+   z->direction = e->invert ? 1.0f : -1.0f;
+   z->sy = e->y0;
+   z->ey = e->y1;
+   z->next = 0;
+   return z;
+}
+#else
+#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
+
+#if STBTT_RASTERIZER_VERSION == 1
+// note: this routine clips fills that extend off the edges... ideally this
+// wouldn't happen, but it could happen if the truetype glyph bounding boxes
+// are wrong, or if the user supplies a too-small bitmap
+static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__active_edge *e, int max_weight)
+{
+   // non-zero winding fill
+   int x0=0, w=0;
+
+   while (e) {
+      if (w == 0) {
+         // if we're currently at zero, we need to record the edge start point
+         x0 = e->x; w += e->direction;
+      } else {
+         int x1 = e->x; w += e->direction;
+         // if we went to zero, we need to draw
+         if (w == 0) {
+            int i = x0 >> STBTT_FIXSHIFT;
+            int j = x1 >> STBTT_FIXSHIFT;
+
+            if (i < len && j >= 0) {
+               if (i == j) {
+                  // x0,x1 are the same pixel, so compute combined coverage
+                  scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT);
+               } else {
+                  if (i >= 0) // add antialiasing for x0
+                     scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT);
+                  else
+                     i = -1; // clip
+
+                  if (j < len) // add antialiasing for x1
+                     scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT);
+                  else
+                     j = len; // clip
+
+                  for (++i; i < j; ++i) // fill pixels between x0 and x1
+                     scanline[i] = scanline[i] + (stbtt_uint8) max_weight;
+               }
+            }
+         }
+      }
+
+      e = e->next;
+   }
+}
+
+static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
+{
+   stbtt__hheap hh = { 0, 0, 0 };
+   stbtt__active_edge *active = NULL;
+   int y,j=0;
+   int max_weight = (255 / vsubsample);  // weight per vertical scanline
+   int s; // vertical subsample index
+   unsigned char scanline_data[512], *scanline;
+
+   if (result->w > 512)
+      scanline = (unsigned char *) STBTT_malloc(result->w, userdata);
+   else
+      scanline = scanline_data;
+
+   y = off_y * vsubsample;
+   e[n].y0 = (off_y + result->h) * (float) vsubsample + 1;
+
+   while (j < result->h) {
+      STBTT_memset(scanline, 0, result->w);
+      for (s=0; s < vsubsample; ++s) {
+         // find center of pixel for this scanline
+         float scan_y = y + 0.5f;
+         stbtt__active_edge **step = &active;
+
+         // update all active edges;
+         // remove all active edges that terminate before the center of this scanline
+         while (*step) {
+            stbtt__active_edge * z = *step;
+            if (z->ey <= scan_y) {
+               *step = z->next; // delete from list
+               STBTT_assert(z->direction);
+               z->direction = 0;
+               stbtt__hheap_free(&hh, z);
+            } else {
+               z->x += z->dx; // advance to position for current scanline
+               step = &((*step)->next); // advance through list
+            }
+         }
+
+         // resort the list if needed
+         for(;;) {
+            int changed=0;
+            step = &active;
+            while (*step && (*step)->next) {
+               if ((*step)->x > (*step)->next->x) {
+                  stbtt__active_edge *t = *step;
+                  stbtt__active_edge *q = t->next;
+
+                  t->next = q->next;
+                  q->next = t;
+                  *step = q;
+                  changed = 1;
+               }
+               step = &(*step)->next;
+            }
+            if (!changed) break;
+         }
+
+         // insert all edges that start before the center of this scanline -- omit ones that also end on this scanline
+         while (e->y0 <= scan_y) {
+            if (e->y1 > scan_y) {
+               stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata);
+               if (z != NULL) {
+                  // find insertion point
+                  if (active == NULL)
+                     active = z;
+                  else if (z->x < active->x) {
+                     // insert at front
+                     z->next = active;
+                     active = z;
+                  } else {
+                     // find thing to insert AFTER
+                     stbtt__active_edge *p = active;
+                     while (p->next && p->next->x < z->x)
+                        p = p->next;
+                     // at this point, p->next->x is NOT < z->x
+                     z->next = p->next;
+                     p->next = z;
+                  }
+               }
+            }
+            ++e;
+         }
+
+         // now process all active edges in XOR fashion
+         if (active)
+            stbtt__fill_active_edges(scanline, result->w, active, max_weight);
+
+         ++y;
+      }
+      STBTT_memcpy(result->pixels + j * result->stride, scanline, result->w);
+      ++j;
+   }
+
+   stbtt__hheap_cleanup(&hh, userdata);
+
+   if (scanline != scanline_data)
+      STBTT_free(scanline, userdata);
+}
+
+#elif STBTT_RASTERIZER_VERSION == 2
+
+// the edge passed in here does not cross the vertical line at x or the vertical line at x+1
+// (i.e. it has already been clipped to those)
+static void stbtt__handle_clipped_edge(float *scanline, int x, stbtt__active_edge *e, float x0, float y0, float x1, float y1)
+{
+   if (y0 == y1) return;
+   STBTT_assert(y0 < y1);
+   STBTT_assert(e->sy <= e->ey);
+   if (y0 > e->ey) return;
+   if (y1 < e->sy) return;
+   if (y0 < e->sy) {
+      x0 += (x1-x0) * (e->sy - y0) / (y1-y0);
+      y0 = e->sy;
+   }
+   if (y1 > e->ey) {
+      x1 += (x1-x0) * (e->ey - y1) / (y1-y0);
+      y1 = e->ey;
+   }
+
+   if (x0 == x)
+      STBTT_assert(x1 <= x+1);
+   else if (x0 == x+1)
+      STBTT_assert(x1 >= x);
+   else if (x0 <= x)
+      STBTT_assert(x1 <= x);
+   else if (x0 >= x+1)
+      STBTT_assert(x1 >= x+1);
+   else
+      STBTT_assert(x1 >= x && x1 <= x+1);
+
+   if (x0 <= x && x1 <= x)
+      scanline[x] += e->direction * (y1-y0);
+   else if (x0 >= x+1 && x1 >= x+1)
+      ;
+   else {
+      STBTT_assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1);
+      scanline[x] += e->direction * (y1-y0) * (1-((x0-x)+(x1-x))/2); // coverage = 1 - average x position
+   }
+}
+
+static float stbtt__sized_trapezoid_area(float height, float top_width, float bottom_width)
+{
+   STBTT_assert(top_width >= 0);
+   STBTT_assert(bottom_width >= 0);
+   return (top_width + bottom_width) / 2.0f * height;
+}
+
+static float stbtt__position_trapezoid_area(float height, float tx0, float tx1, float bx0, float bx1)
+{
+   return stbtt__sized_trapezoid_area(height, tx1 - tx0, bx1 - bx0);
+}
+
+static float stbtt__sized_triangle_area(float height, float width)
+{
+   return height * width / 2;
+}
+
+static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill, int len, stbtt__active_edge *e, float y_top)
+{
+   float y_bottom = y_top+1;
+
+   while (e) {
+      // brute force every pixel
+
+      // compute intersection points with top & bottom
+      STBTT_assert(e->ey >= y_top);
+
+      if (e->fdx == 0) {
+         float x0 = e->fx;
+         if (x0 < len) {
+            if (x0 >= 0) {
+               stbtt__handle_clipped_edge(scanline,(int) x0,e, x0,y_top, x0,y_bottom);
+               stbtt__handle_clipped_edge(scanline_fill-1,(int) x0+1,e, x0,y_top, x0,y_bottom);
+            } else {
+               stbtt__handle_clipped_edge(scanline_fill-1,0,e, x0,y_top, x0,y_bottom);
+            }
+         }
+      } else {
+         float x0 = e->fx;
+         float dx = e->fdx;
+         float xb = x0 + dx;
+         float x_top, x_bottom;
+         float sy0,sy1;
+         float dy = e->fdy;
+         STBTT_assert(e->sy <= y_bottom && e->ey >= y_top);
+
+         // compute endpoints of line segment clipped to this scanline (if the
+         // line segment starts on this scanline. x0 is the intersection of the
+         // line with y_top, but that may be off the line segment.
+         if (e->sy > y_top) {
+            x_top = x0 + dx * (e->sy - y_top);
+            sy0 = e->sy;
+         } else {
+            x_top = x0;
+            sy0 = y_top;
+         }
+         if (e->ey < y_bottom) {
+            x_bottom = x0 + dx * (e->ey - y_top);
+            sy1 = e->ey;
+         } else {
+            x_bottom = xb;
+            sy1 = y_bottom;
+         }
+
+         if (x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len) {
+            // from here on, we don't have to range check x values
+
+            if ((int) x_top == (int) x_bottom) {
+               float height;
+               // simple case, only spans one pixel
+               int x = (int) x_top;
+               height = (sy1 - sy0) * e->direction;
+               STBTT_assert(x >= 0 && x < len);
+               scanline[x]      += stbtt__position_trapezoid_area(height, x_top, x+1.0f, x_bottom, x+1.0f);
+               scanline_fill[x] += height; // everything right of this pixel is filled
+            } else {
+               int x,x1,x2;
+               float y_crossing, y_final, step, sign, area;
+               // covers 2+ pixels
+               if (x_top > x_bottom) {
+                  // flip scanline vertically; signed area is the same
+                  float t;
+                  sy0 = y_bottom - (sy0 - y_top);
+                  sy1 = y_bottom - (sy1 - y_top);
+                  t = sy0, sy0 = sy1, sy1 = t;
+                  t = x_bottom, x_bottom = x_top, x_top = t;
+                  dx = -dx;
+                  dy = -dy;
+                  t = x0, x0 = xb, xb = t;
+               }
+               STBTT_assert(dy >= 0);
+               STBTT_assert(dx >= 0);
+
+               x1 = (int) x_top;
+               x2 = (int) x_bottom;
+               // compute intersection with y axis at x1+1
+               y_crossing = y_top + dy * (x1+1 - x0);
+
+               // compute intersection with y axis at x2
+               y_final = y_top + dy * (x2 - x0);
+
+               //           x1    x_top                            x2    x_bottom
+               //     y_top  +------|-----+------------+------------+--------|---+------------+
+               //            |            |            |            |            |            |
+               //            |            |            |            |            |            |
+               //       sy0  |      Txxxxx|............|............|............|............|
+               // y_crossing |            *xxxxx.......|............|............|............|
+               //            |            |     xxxxx..|............|............|............|
+               //            |            |     /-   xx*xxxx........|............|............|
+               //            |            | dy <       |    xxxxxx..|............|............|
+               //   y_final  |            |     \-     |          xx*xxx.........|............|
+               //       sy1  |            |            |            |   xxxxxB...|............|
+               //            |            |            |            |            |            |
+               //            |            |            |            |            |            |
+               //  y_bottom  +------------+------------+------------+------------+------------+
+               //
+               // goal is to measure the area covered by '.' in each pixel
+
+               // if x2 is right at the right edge of x1, y_crossing can blow up, github #1057
+               // @TODO: maybe test against sy1 rather than y_bottom?
+               if (y_crossing > y_bottom)
+                  y_crossing = y_bottom;
+
+               sign = e->direction;
+
+               // area of the rectangle covered from sy0..y_crossing
+               area = sign * (y_crossing-sy0);
+
+               // area of the triangle (x_top,sy0), (x1+1,sy0), (x1+1,y_crossing)
+               scanline[x1] += stbtt__sized_triangle_area(area, x1+1 - x_top);
+
+               // check if final y_crossing is blown up; no test case for this
+               if (y_final > y_bottom) {
+                  y_final = y_bottom;
+                  dy = (y_final - y_crossing ) / (x2 - (x1+1)); // if denom=0, y_final = y_crossing, so y_final <= y_bottom
+               }
+
+               // in second pixel, area covered by line segment found in first pixel
+               // is always a rectangle 1 wide * the height of that line segment; this
+               // is exactly what the variable 'area' stores. it also gets a contribution
+               // from the line segment within it. the THIRD pixel will get the first
+               // pixel's rectangle contribution, the second pixel's rectangle contribution,
+               // and its own contribution. the 'own contribution' is the same in every pixel except
+               // the leftmost and rightmost, a trapezoid that slides down in each pixel.
+               // the second pixel's contribution to the third pixel will be the
+               // rectangle 1 wide times the height change in the second pixel, which is dy.
+
+               step = sign * dy * 1; // dy is dy/dx, change in y for every 1 change in x,
+               // which multiplied by 1-pixel-width is how much pixel area changes for each step in x
+               // so the area advances by 'step' every time
+
+               for (x = x1+1; x < x2; ++x) {
+                  scanline[x] += area + step/2; // area of trapezoid is 1*step/2
+                  area += step;
+               }
+               STBTT_assert(STBTT_fabs(area) <= 1.01f); // accumulated error from area += step unless we round step down
+               STBTT_assert(sy1 > y_final-0.01f);
+
+               // area covered in the last pixel is the rectangle from all the pixels to the left,
+               // plus the trapezoid filled by the line segment in this pixel all the way to the right edge
+               scanline[x2] += area + sign * stbtt__position_trapezoid_area(sy1-y_final, (float) x2, x2+1.0f, x_bottom, x2+1.0f);
+
+               // the rest of the line is filled based on the total height of the line segment in this pixel
+               scanline_fill[x2] += sign * (sy1-sy0);
+            }
+         } else {
+            // if edge goes outside of box we're drawing, we require
+            // clipping logic. since this does not match the intended use
+            // of this library, we use a different, very slow brute
+            // force implementation
+            // note though that this does happen some of the time because
+            // x_top and x_bottom can be extrapolated at the top & bottom of
+            // the shape and actually lie outside the bounding box
+            int x;
+            for (x=0; x < len; ++x) {
+               // cases:
+               //
+               // there can be up to two intersections with the pixel. any intersection
+               // with left or right edges can be handled by splitting into two (or three)
+               // regions. intersections with top & bottom do not necessitate case-wise logic.
+               //
+               // the old way of doing this found the intersections with the left & right edges,
+               // then used some simple logic to produce up to three segments in sorted order
+               // from top-to-bottom. however, this had a problem: if an x edge was epsilon
+               // across the x border, then the corresponding y position might not be distinct
+               // from the other y segment, and it might ignored as an empty segment. to avoid
+               // that, we need to explicitly produce segments based on x positions.
+
+               // rename variables to clearly-defined pairs
+               float y0 = y_top;
+               float x1 = (float) (x);
+               float x2 = (float) (x+1);
+               float x3 = xb;
+               float y3 = y_bottom;
+
+               // x = e->x + e->dx * (y-y_top)
+               // (y-y_top) = (x - e->x) / e->dx
+               // y = (x - e->x) / e->dx + y_top
+               float y1 = (x - x0) / dx + y_top;
+               float y2 = (x+1 - x0) / dx + y_top;
+
+               if (x0 < x1 && x3 > x2) {         // three segments descending down-right
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
+               } else if (x3 < x1 && x0 > x2) {  // three segments descending down-left
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
+               } else if (x0 < x1 && x3 > x1) {  // two segments across x, down-right
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
+               } else if (x3 < x1 && x0 > x1) {  // two segments across x, down-left
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
+               } else if (x0 < x2 && x3 > x2) {  // two segments across x+1, down-right
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
+               } else if (x3 < x2 && x0 > x2) {  // two segments across x+1, down-left
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
+               } else {  // one segment
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x3,y3);
+               }
+            }
+         }
+      }
+      e = e->next;
+   }
+}
+
+// directly AA rasterize edges w/o supersampling
+static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
+{
+   stbtt__hheap hh = { 0, 0, 0 };
+   stbtt__active_edge *active = NULL;
+   int y,j=0, i;
+   float scanline_data[129], *scanline, *scanline2;
+
+   STBTT__NOTUSED(vsubsample);
+
+   if (result->w > 64)
+      scanline = (float *) STBTT_malloc((result->w*2+1) * sizeof(float), userdata);
+   else
+      scanline = scanline_data;
+
+   scanline2 = scanline + result->w;
+
+   y = off_y;
+   e[n].y0 = (float) (off_y + result->h) + 1;
+
+   while (j < result->h) {
+      // find center of pixel for this scanline
+      float scan_y_top    = y + 0.0f;
+      float scan_y_bottom = y + 1.0f;
+      stbtt__active_edge **step = &active;
+
+      STBTT_memset(scanline , 0, result->w*sizeof(scanline[0]));
+      STBTT_memset(scanline2, 0, (result->w+1)*sizeof(scanline[0]));
+
+      // update all active edges;
+      // remove all active edges that terminate before the top of this scanline
+      while (*step) {
+         stbtt__active_edge * z = *step;
+         if (z->ey <= scan_y_top) {
+            *step = z->next; // delete from list
+            STBTT_assert(z->direction);
+            z->direction = 0;
+            stbtt__hheap_free(&hh, z);
+         } else {
+            step = &((*step)->next); // advance through list
+         }
+      }
+
+      // insert all edges that start before the bottom of this scanline
+      while (e->y0 <= scan_y_bottom) {
+         if (e->y0 != e->y1) {
+            stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y_top, userdata);
+            if (z != NULL) {
+               if (j == 0 && off_y != 0) {
+                  if (z->ey < scan_y_top) {
+                     // this can happen due to subpixel positioning and some kind of fp rounding error i think
+                     z->ey = scan_y_top;
+                  }
+               }
+               STBTT_assert(z->ey >= scan_y_top); // if we get really unlucky a tiny bit of an edge can be out of bounds
+               // insert at front
+               z->next = active;
+               active = z;
+            }
+         }
+         ++e;
+      }
+
+      // now process all active edges
+      if (active)
+         stbtt__fill_active_edges_new(scanline, scanline2+1, result->w, active, scan_y_top);
+
+      {
+         float sum = 0;
+         for (i=0; i < result->w; ++i) {
+            float k;
+            int m;
+            sum += scanline2[i];
+            k = scanline[i] + sum;
+            k = (float) STBTT_fabs(k)*255 + 0.5f;
+            m = (int) k;
+            if (m > 255) m = 255;
+            result->pixels[j*result->stride + i] = (unsigned char) m;
+         }
+      }
+      // advance all the edges
+      step = &active;
+      while (*step) {
+         stbtt__active_edge *z = *step;
+         z->fx += z->fdx; // advance to position for current scanline
+         step = &((*step)->next); // advance through list
+      }
+
+      ++y;
+      ++j;
+   }
+
+   stbtt__hheap_cleanup(&hh, userdata);
+
+   if (scanline != scanline_data)
+      STBTT_free(scanline, userdata);
+}
+#else
+#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
+
+#define STBTT__COMPARE(a,b)  ((a)->y0 < (b)->y0)
+
+static void stbtt__sort_edges_ins_sort(stbtt__edge *p, int n)
+{
+   int i,j;
+   for (i=1; i < n; ++i) {
+      stbtt__edge t = p[i], *a = &t;
+      j = i;
+      while (j > 0) {
+         stbtt__edge *b = &p[j-1];
+         int c = STBTT__COMPARE(a,b);
+         if (!c) break;
+         p[j] = p[j-1];
+         --j;
+      }
+      if (i != j)
+         p[j] = t;
+   }
+}
+
+static void stbtt__sort_edges_quicksort(stbtt__edge *p, int n)
+{
+   /* threshold for transitioning to insertion sort */
+   while (n > 12) {
+      stbtt__edge t;
+      int c01,c12,c,m,i,j;
+
+      /* compute median of three */
+      m = n >> 1;
+      c01 = STBTT__COMPARE(&p[0],&p[m]);
+      c12 = STBTT__COMPARE(&p[m],&p[n-1]);
+      /* if 0 >= mid >= end, or 0 < mid < end, then use mid */
+      if (c01 != c12) {
+         /* otherwise, we'll need to swap something else to middle */
+         int z;
+         c = STBTT__COMPARE(&p[0],&p[n-1]);
+         /* 0>mid && mid<n:  0>n => n; 0<n => 0 */
+         /* 0<mid && mid>n:  0>n => 0; 0<n => n */
+         z = (c == c12) ? 0 : n-1;
+         t = p[z];
+         p[z] = p[m];
+         p[m] = t;
+      }
+      /* now p[m] is the median-of-three */
+      /* swap it to the beginning so it won't move around */
+      t = p[0];
+      p[0] = p[m];
+      p[m] = t;
+
+      /* partition loop */
+      i=1;
+      j=n-1;
+      for(;;) {
+         /* handling of equality is crucial here */
+         /* for sentinels & efficiency with duplicates */
+         for (;;++i) {
+            if (!STBTT__COMPARE(&p[i], &p[0])) break;
+         }
+         for (;;--j) {
+            if (!STBTT__COMPARE(&p[0], &p[j])) break;
+         }
+         /* make sure we haven't crossed */
+         if (i >= j) break;
+         t = p[i];
+         p[i] = p[j];
+         p[j] = t;
+
+         ++i;
+         --j;
+      }
+      /* recurse on smaller side, iterate on larger */
+      if (j < (n-i)) {
+         stbtt__sort_edges_quicksort(p,j);
+         p = p+i;
+         n = n-i;
+      } else {
+         stbtt__sort_edges_quicksort(p+i, n-i);
+         n = j;
+      }
+   }
+}
+
+static void stbtt__sort_edges(stbtt__edge *p, int n)
+{
+   stbtt__sort_edges_quicksort(p, n);
+   stbtt__sort_edges_ins_sort(p, n);
+}
+
+typedef struct
+{
+   float x,y;
+} stbtt__point;
+
+static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcount, int windings, float scale_x, float scale_y, float shift_x, float shift_y, int off_x, int off_y, int invert, void *userdata)
+{
+   float y_scale_inv = invert ? -scale_y : scale_y;
+   stbtt__edge *e;
+   int n,i,j,k,m;
+#if STBTT_RASTERIZER_VERSION == 1
+   int vsubsample = result->h < 8 ? 15 : 5;
+#elif STBTT_RASTERIZER_VERSION == 2
+   int vsubsample = 1;
+#else
+   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
+   // vsubsample should divide 255 evenly; otherwise we won't reach full opacity
+
+   // now we have to blow out the windings into explicit edge lists
+   n = 0;
+   for (i=0; i < windings; ++i)
+      n += wcount[i];
+
+   e = (stbtt__edge *) STBTT_malloc(sizeof(*e) * (n+1), userdata); // add an extra one as a sentinel
+   if (e == 0) return;
+   n = 0;
+
+   m=0;
+   for (i=0; i < windings; ++i) {
+      stbtt__point *p = pts + m;
+      m += wcount[i];
+      j = wcount[i]-1;
+      for (k=0; k < wcount[i]; j=k++) {
+         int a=k,b=j;
+         // skip the edge if horizontal
+         if (p[j].y == p[k].y)
+            continue;
+         // add edge from j to k to the list
+         e[n].invert = 0;
+         if (invert ? p[j].y > p[k].y : p[j].y < p[k].y) {
+            e[n].invert = 1;
+            a=j,b=k;
+         }
+         e[n].x0 = p[a].x * scale_x + shift_x;
+         e[n].y0 = (p[a].y * y_scale_inv + shift_y) * vsubsample;
+         e[n].x1 = p[b].x * scale_x + shift_x;
+         e[n].y1 = (p[b].y * y_scale_inv + shift_y) * vsubsample;
+         ++n;
+      }
+   }
+
+   // now sort the edges by their highest point (should snap to integer, and then by x)
+   //STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare);
+   stbtt__sort_edges(e, n);
+
+   // now, traverse the scanlines and find the intersections on each scanline, use xor winding rule
+   stbtt__rasterize_sorted_edges(result, e, n, vsubsample, off_x, off_y, userdata);
+
+   STBTT_free(e, userdata);
+}
+
+static void stbtt__add_point(stbtt__point *points, int n, float x, float y)
+{
+   if (!points) return; // during first pass, it's unallocated
+   points[n].x = x;
+   points[n].y = y;
+}
+
+// tessellate until threshold p is happy... @TODO warped to compensate for non-linear stretching
+static int stbtt__tesselate_curve(stbtt__point *points, int *num_points, float x0, float y0, float x1, float y1, float x2, float y2, float objspace_flatness_squared, int n)
+{
+   // midpoint
+   float mx = (x0 + 2*x1 + x2)/4;
+   float my = (y0 + 2*y1 + y2)/4;
+   // versus directly drawn line
+   float dx = (x0+x2)/2 - mx;
+   float dy = (y0+y2)/2 - my;
+   if (n > 16) // 65536 segments on one curve better be enough!
+      return 1;
+   if (dx*dx+dy*dy > objspace_flatness_squared) { // half-pixel error allowed... need to be smaller if AA
+      stbtt__tesselate_curve(points, num_points, x0,y0, (x0+x1)/2.0f,(y0+y1)/2.0f, mx,my, objspace_flatness_squared,n+1);
+      stbtt__tesselate_curve(points, num_points, mx,my, (x1+x2)/2.0f,(y1+y2)/2.0f, x2,y2, objspace_flatness_squared,n+1);
+   } else {
+      stbtt__add_point(points, *num_points,x2,y2);
+      *num_points = *num_points+1;
+   }
+   return 1;
+}
+
+static void stbtt__tesselate_cubic(stbtt__point *points, int *num_points, float x0, float y0, float x1, float y1, float x2, float y2, float x3, float y3, float objspace_flatness_squared, int n)
+{
+   // @TODO this "flatness" calculation is just made-up nonsense that seems to work well enough
+   float dx0 = x1-x0;
+   float dy0 = y1-y0;
+   float dx1 = x2-x1;
+   float dy1 = y2-y1;
+   float dx2 = x3-x2;
+   float dy2 = y3-y2;
+   float dx = x3-x0;
+   float dy = y3-y0;
+   float longlen = (float) (STBTT_sqrt(dx0*dx0+dy0*dy0)+STBTT_sqrt(dx1*dx1+dy1*dy1)+STBTT_sqrt(dx2*dx2+dy2*dy2));
+   float shortlen = (float) STBTT_sqrt(dx*dx+dy*dy);
+   float flatness_squared = longlen*longlen-shortlen*shortlen;
+
+   if (n > 16) // 65536 segments on one curve better be enough!
+      return;
+
+   if (flatness_squared > objspace_flatness_squared) {
+      float x01 = (x0+x1)/2;
+      float y01 = (y0+y1)/2;
+      float x12 = (x1+x2)/2;
+      float y12 = (y1+y2)/2;
+      float x23 = (x2+x3)/2;
+      float y23 = (y2+y3)/2;
+
+      float xa = (x01+x12)/2;
+      float ya = (y01+y12)/2;
+      float xb = (x12+x23)/2;
+      float yb = (y12+y23)/2;
+
+      float mx = (xa+xb)/2;
+      float my = (ya+yb)/2;
+
+      stbtt__tesselate_cubic(points, num_points, x0,y0, x01,y01, xa,ya, mx,my, objspace_flatness_squared,n+1);
+      stbtt__tesselate_cubic(points, num_points, mx,my, xb,yb, x23,y23, x3,y3, objspace_flatness_squared,n+1);
+   } else {
+      stbtt__add_point(points, *num_points,x3,y3);
+      *num_points = *num_points+1;
+   }
+}
+
+// returns number of contours
+static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts, float objspace_flatness, int **contour_lengths, int *num_contours, void *userdata)
+{
+   stbtt__point *points=0;
+   int num_points=0;
+
+   float objspace_flatness_squared = objspace_flatness * objspace_flatness;
+   int i,n=0,start=0, pass;
+
+   // count how many "moves" there are to get the contour count
+   for (i=0; i < num_verts; ++i)
+      if (vertices[i].type == STBTT_vmove)
+         ++n;
+
+   *num_contours = n;
+   if (n == 0) return 0;
+
+   *contour_lengths = (int *) STBTT_malloc(sizeof(**contour_lengths) * n, userdata);
+
+   if (*contour_lengths == 0) {
+      *num_contours = 0;
+      return 0;
+   }
+
+   // make two passes through the points so we don't need to realloc
+   for (pass=0; pass < 2; ++pass) {
+      float x=0,y=0;
+      if (pass == 1) {
+         points = (stbtt__point *) STBTT_malloc(num_points * sizeof(points[0]), userdata);
+         if (points == NULL) goto error;
+      }
+      num_points = 0;
+      n= -1;
+      for (i=0; i < num_verts; ++i) {
+         switch (vertices[i].type) {
+            case STBTT_vmove:
+               // start the next contour
+               if (n >= 0)
+                  (*contour_lengths)[n] = num_points - start;
+               ++n;
+               start = num_points;
+
+               x = vertices[i].x, y = vertices[i].y;
+               stbtt__add_point(points, num_points++, x,y);
+               break;
+            case STBTT_vline:
+               x = vertices[i].x, y = vertices[i].y;
+               stbtt__add_point(points, num_points++, x, y);
+               break;
+            case STBTT_vcurve:
+               stbtt__tesselate_curve(points, &num_points, x,y,
+                                        vertices[i].cx, vertices[i].cy,
+                                        vertices[i].x,  vertices[i].y,
+                                        objspace_flatness_squared, 0);
+               x = vertices[i].x, y = vertices[i].y;
+               break;
+            case STBTT_vcubic:
+               stbtt__tesselate_cubic(points, &num_points, x,y,
+                                        vertices[i].cx, vertices[i].cy,
+                                        vertices[i].cx1, vertices[i].cy1,
+                                        vertices[i].x,  vertices[i].y,
+                                        objspace_flatness_squared, 0);
+               x = vertices[i].x, y = vertices[i].y;
+               break;
+         }
+      }
+      (*contour_lengths)[n] = num_points - start;
+   }
+
+   return points;
+error:
+   STBTT_free(points, userdata);
+   STBTT_free(*contour_lengths, userdata);
+   *contour_lengths = 0;
+   *num_contours = 0;
+   return NULL;
+}
+
+STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, float flatness_in_pixels, stbtt_vertex *vertices, int num_verts, float scale_x, float scale_y, float shift_x, float shift_y, int x_off, int y_off, int invert, void *userdata)
+{
+   float scale            = scale_x > scale_y ? scale_y : scale_x;
+   int winding_count      = 0;
+   int *winding_lengths   = NULL;
+   stbtt__point *windings = stbtt_FlattenCurves(vertices, num_verts, flatness_in_pixels / scale, &winding_lengths, &winding_count, userdata);
+   if (windings) {
+      stbtt__rasterize(result, windings, winding_lengths, winding_count, scale_x, scale_y, shift_x, shift_y, x_off, y_off, invert, userdata);
+      STBTT_free(winding_lengths, userdata);
+      STBTT_free(windings, userdata);
+   }
+}
+
+STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata)
+{
+   STBTT_free(bitmap, userdata);
+}
+
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int *width, int *height, int *xoff, int *yoff)
+{
+   int ix0,iy0,ix1,iy1;
+   stbtt__bitmap gbm;
+   stbtt_vertex *vertices;
+   int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices);
+
+   if (scale_x == 0) scale_x = scale_y;
+   if (scale_y == 0) {
+      if (scale_x == 0) {
+         STBTT_free(vertices, info->userdata);
+         return NULL;
+      }
+      scale_y = scale_x;
+   }
+
+   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,&ix1,&iy1);
+
+   // now we get the size
+   gbm.w = (ix1 - ix0);
+   gbm.h = (iy1 - iy0);
+   gbm.pixels = NULL; // in case we error
+
+   if (width ) *width  = gbm.w;
+   if (height) *height = gbm.h;
+   if (xoff  ) *xoff   = ix0;
+   if (yoff  ) *yoff   = iy0;
+
+   if (gbm.w && gbm.h) {
+      gbm.pixels = (unsigned char *) STBTT_malloc(gbm.w * gbm.h, info->userdata);
+      if (gbm.pixels) {
+         gbm.stride = gbm.w;
+
+         stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata);
+      }
+   }
+   STBTT_free(vertices, info->userdata);
+   return gbm.pixels;
+}
+
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int glyph, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y, 0.0f, 0.0f, glyph, width, height, xoff, yoff);
+}
+
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph)
+{
+   int ix0,iy0;
+   stbtt_vertex *vertices;
+   int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices);
+   stbtt__bitmap gbm;
+
+   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,0,0);
+   gbm.pixels = output;
+   gbm.w = out_w;
+   gbm.h = out_h;
+   gbm.stride = out_stride;
+
+   if (gbm.w && gbm.h)
+      stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0,iy0, 1, info->userdata);
+
+   STBTT_free(vertices, info->userdata);
+}
+
+STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph)
+{
+   stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, glyph);
+}
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y,shift_x,shift_y, stbtt_FindGlyphIndex(info,codepoint), width,height,xoff,yoff);
+}
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int oversample_x, int oversample_y, float *sub_x, float *sub_y, int codepoint)
+{
+   stbtt_MakeGlyphBitmapSubpixelPrefilter(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, oversample_x, oversample_y, sub_x, sub_y, stbtt_FindGlyphIndex(info,codepoint));
+}
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint)
+{
+   stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex(info,codepoint));
+}
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetCodepointBitmapSubpixel(info, scale_x, scale_y, 0.0f,0.0f, codepoint, width,height,xoff,yoff);
+}
+
+STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint)
+{
+   stbtt_MakeCodepointBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, codepoint);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// bitmap baking
+//
+// This is SUPER-CRAPPY packing to keep source code small
+
+static int stbtt_BakeFontBitmap_internal(unsigned char *data, int offset,  // font location (use offset=0 for plain .ttf)
+                                float pixel_height,                     // height of font in pixels
+                                unsigned char *pixels, int pw, int ph,  // bitmap to be filled in
+                                int first_char, int num_chars,          // characters to bake
+                                stbtt_bakedchar *chardata)
+{
+   float scale;
+   int x,y,bottom_y, i;
+   stbtt_fontinfo f;
+   f.userdata = NULL;
+   if (!stbtt_InitFont(&f, data, offset))
+      return -1;
+   STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels
+   x=y=1;
+   bottom_y = 1;
+
+   scale = stbtt_ScaleForPixelHeight(&f, pixel_height);
+
+   for (i=0; i < num_chars; ++i) {
+      int advance, lsb, x0,y0,x1,y1,gw,gh;
+      int g = stbtt_FindGlyphIndex(&f, first_char + i);
+      stbtt_GetGlyphHMetrics(&f, g, &advance, &lsb);
+      stbtt_GetGlyphBitmapBox(&f, g, scale,scale, &x0,&y0,&x1,&y1);
+      gw = x1-x0;
+      gh = y1-y0;
+      if (x + gw + 1 >= pw)
+         y = bottom_y, x = 1; // advance to next row
+      if (y + gh + 1 >= ph) // check if it fits vertically AFTER potentially moving to next row
+         return -i;
+      STBTT_assert(x+gw < pw);
+      STBTT_assert(y+gh < ph);
+      stbtt_MakeGlyphBitmap(&f, pixels+x+y*pw, gw,gh,pw, scale,scale, g);
+      chardata[i].x0 = (stbtt_int16) x;
+      chardata[i].y0 = (stbtt_int16) y;
+      chardata[i].x1 = (stbtt_int16) (x + gw);
+      chardata[i].y1 = (stbtt_int16) (y + gh);
+      chardata[i].xadvance = scale * advance;
+      chardata[i].xoff     = (float) x0;
+      chardata[i].yoff     = (float) y0;
+      x = x + gw + 1;
+      if (y+gh+1 > bottom_y)
+         bottom_y = y+gh+1;
+   }
+   return bottom_y;
+}
+
+STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph, int char_index, float *xpos, float *ypos, stbtt_aligned_quad *q, int opengl_fillrule)
+{
+   float d3d_bias = opengl_fillrule ? 0 : -0.5f;
+   float ipw = 1.0f / pw, iph = 1.0f / ph;
+   const stbtt_bakedchar *b = chardata + char_index;
+   int round_x = STBTT_ifloor((*xpos + b->xoff) + 0.5f);
+   int round_y = STBTT_ifloor((*ypos + b->yoff) + 0.5f);
+
+   q->x0 = round_x + d3d_bias;
+   q->y0 = round_y + d3d_bias;
+   q->x1 = round_x + b->x1 - b->x0 + d3d_bias;
+   q->y1 = round_y + b->y1 - b->y0 + d3d_bias;
+
+   q->s0 = b->x0 * ipw;
+   q->t0 = b->y0 * iph;
+   q->s1 = b->x1 * ipw;
+   q->t1 = b->y1 * iph;
+
+   *xpos += b->xadvance;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// rectangle packing replacement routines if you don't have stb_rect_pack.h
+//
+
+#ifndef STB_RECT_PACK_VERSION
+
+typedef int stbrp_coord;
+
+////////////////////////////////////////////////////////////////////////////////////
+//                                                                                //
+//                                                                                //
+// COMPILER WARNING ?!?!?                                                         //
+//                                                                                //
+//                                                                                //
+// if you get a compile warning due to these symbols being defined more than      //
+// once, move #include "stb_rect_pack.h" before #include "stb_truetype.h"         //
+//                                                                                //
+////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct
+{
+   int width,height;
+   int x,y,bottom_y;
+} stbrp_context;
+
+typedef struct
+{
+   unsigned char x;
+} stbrp_node;
+
+struct stbrp_rect
+{
+   stbrp_coord x,y;
+   int id,w,h,was_packed;
+};
+
+static void stbrp_init_target(stbrp_context *con, int pw, int ph, stbrp_node *nodes, int num_nodes)
+{
+   con->width  = pw;
+   con->height = ph;
+   con->x = 0;
+   con->y = 0;
+   con->bottom_y = 0;
+   STBTT__NOTUSED(nodes);
+   STBTT__NOTUSED(num_nodes);
+}
+
+static void stbrp_pack_rects(stbrp_context *con, stbrp_rect *rects, int num_rects)
+{
+   int i;
+   for (i=0; i < num_rects; ++i) {
+      if (con->x + rects[i].w > con->width) {
+         con->x = 0;
+         con->y = con->bottom_y;
+      }
+      if (con->y + rects[i].h > con->height)
+         break;
+      rects[i].x = con->x;
+      rects[i].y = con->y;
+      rects[i].was_packed = 1;
+      con->x += rects[i].w;
+      if (con->y + rects[i].h > con->bottom_y)
+         con->bottom_y = con->y + rects[i].h;
+   }
+   for (   ; i < num_rects; ++i)
+      rects[i].was_packed = 0;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// bitmap baking
+//
+// This is SUPER-AWESOME (tm Ryan Gordon) packing using stb_rect_pack.h. If
+// stb_rect_pack.h isn't available, it uses the BakeFontBitmap strategy.
+
+STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int pw, int ph, int stride_in_bytes, int padding, void *alloc_context)
+{
+   stbrp_context *context = (stbrp_context *) STBTT_malloc(sizeof(*context)            ,alloc_context);
+   int            num_nodes = pw - padding;
+   stbrp_node    *nodes   = (stbrp_node    *) STBTT_malloc(sizeof(*nodes  ) * num_nodes,alloc_context);
+
+   if (context == NULL || nodes == NULL) {
+      if (context != NULL) STBTT_free(context, alloc_context);
+      if (nodes   != NULL) STBTT_free(nodes  , alloc_context);
+      return 0;
+   }
+
+   spc->user_allocator_context = alloc_context;
+   spc->width = pw;
+   spc->height = ph;
+   spc->pixels = pixels;
+   spc->pack_info = context;
+   spc->nodes = nodes;
+   spc->padding = padding;
+   spc->stride_in_bytes = stride_in_bytes != 0 ? stride_in_bytes : pw;
+   spc->h_oversample = 1;
+   spc->v_oversample = 1;
+   spc->skip_missing = 0;
+
+   stbrp_init_target(context, pw-padding, ph-padding, nodes, num_nodes);
+
+   if (pixels)
+      STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels
+
+   return 1;
+}
+
+STBTT_DEF void stbtt_PackEnd  (stbtt_pack_context *spc)
+{
+   STBTT_free(spc->nodes    , spc->user_allocator_context);
+   STBTT_free(spc->pack_info, spc->user_allocator_context);
+}
+
+STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample)
+{
+   STBTT_assert(h_oversample <= STBTT_MAX_OVERSAMPLE);
+   STBTT_assert(v_oversample <= STBTT_MAX_OVERSAMPLE);
+   if (h_oversample <= STBTT_MAX_OVERSAMPLE)
+      spc->h_oversample = h_oversample;
+   if (v_oversample <= STBTT_MAX_OVERSAMPLE)
+      spc->v_oversample = v_oversample;
+}
+
+STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip)
+{
+   spc->skip_missing = skip;
+}
+
+#define STBTT__OVER_MASK  (STBTT_MAX_OVERSAMPLE-1)
+
+static void stbtt__h_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width)
+{
+   unsigned char buffer[STBTT_MAX_OVERSAMPLE];
+   int safe_w = w - kernel_width;
+   int j;
+   STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze
+   for (j=0; j < h; ++j) {
+      int i;
+      unsigned int total;
+      STBTT_memset(buffer, 0, kernel_width);
+
+      total = 0;
+
+      // make kernel_width a constant in common cases so compiler can optimize out the divide
+      switch (kernel_width) {
+         case 2:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 2);
+            }
+            break;
+         case 3:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 3);
+            }
+            break;
+         case 4:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 4);
+            }
+            break;
+         case 5:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 5);
+            }
+            break;
+         default:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / kernel_width);
+            }
+            break;
+      }
+
+      for (; i < w; ++i) {
+         STBTT_assert(pixels[i] == 0);
+         total -= buffer[i & STBTT__OVER_MASK];
+         pixels[i] = (unsigned char) (total / kernel_width);
+      }
+
+      pixels += stride_in_bytes;
+   }
+}
+
+static void stbtt__v_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width)
+{
+   unsigned char buffer[STBTT_MAX_OVERSAMPLE];
+   int safe_h = h - kernel_width;
+   int j;
+   STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze
+   for (j=0; j < w; ++j) {
+      int i;
+      unsigned int total;
+      STBTT_memset(buffer, 0, kernel_width);
+
+      total = 0;
+
+      // make kernel_width a constant in common cases so compiler can optimize out the divide
+      switch (kernel_width) {
+         case 2:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 2);
+            }
+            break;
+         case 3:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 3);
+            }
+            break;
+         case 4:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 4);
+            }
+            break;
+         case 5:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 5);
+            }
+            break;
+         default:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width);
+            }
+            break;
+      }
+
+      for (; i < h; ++i) {
+         STBTT_assert(pixels[i*stride_in_bytes] == 0);
+         total -= buffer[i & STBTT__OVER_MASK];
+         pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width);
+      }
+
+      pixels += 1;
+   }
+}
+
+static float stbtt__oversample_shift(int oversample)
+{
+   if (!oversample)
+      return 0.0f;
+
+   // The prefilter is a box filter of width "oversample",
+   // which shifts phase by (oversample - 1)/2 pixels in
+   // oversampled space. We want to shift in the opposite
+   // direction to counter this.
+   return (float)-(oversample - 1) / (2.0f * (float)oversample);
+}
+
+// rects array must be big enough to accommodate all characters in the given ranges
+STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects)
+{
+   int i,j,k;
+   int missing_glyph_added = 0;
+
+   k=0;
+   for (i=0; i < num_ranges; ++i) {
+      float fh = ranges[i].font_size;
+      float scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh);
+      ranges[i].h_oversample = (unsigned char) spc->h_oversample;
+      ranges[i].v_oversample = (unsigned char) spc->v_oversample;
+      for (j=0; j < ranges[i].num_chars; ++j) {
+         int x0,y0,x1,y1;
+         int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j];
+         int glyph = stbtt_FindGlyphIndex(info, codepoint);
+         if (glyph == 0 && (spc->skip_missing || missing_glyph_added)) {
+            rects[k].w = rects[k].h = 0;
+         } else {
+            stbtt_GetGlyphBitmapBoxSubpixel(info,glyph,
+                                            scale * spc->h_oversample,
+                                            scale * spc->v_oversample,
+                                            0,0,
+                                            &x0,&y0,&x1,&y1);
+            rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1);
+            rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1);
+            if (glyph == 0)
+               missing_glyph_added = 1;
+         }
+         ++k;
+      }
+   }
+
+   return k;
+}
+
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int prefilter_x, int prefilter_y, float *sub_x, float *sub_y, int glyph)
+{
+   stbtt_MakeGlyphBitmapSubpixel(info,
+                                 output,
+                                 out_w - (prefilter_x - 1),
+                                 out_h - (prefilter_y - 1),
+                                 out_stride,
+                                 scale_x,
+                                 scale_y,
+                                 shift_x,
+                                 shift_y,
+                                 glyph);
+
+   if (prefilter_x > 1)
+      stbtt__h_prefilter(output, out_w, out_h, out_stride, prefilter_x);
+
+   if (prefilter_y > 1)
+      stbtt__v_prefilter(output, out_w, out_h, out_stride, prefilter_y);
+
+   *sub_x = stbtt__oversample_shift(prefilter_x);
+   *sub_y = stbtt__oversample_shift(prefilter_y);
+}
+
+// rects array must be big enough to accommodate all characters in the given ranges
+STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects)
+{
+   int i,j,k, missing_glyph = -1, return_value = 1;
+
+   // save current values
+   int old_h_over = spc->h_oversample;
+   int old_v_over = spc->v_oversample;
+
+   k = 0;
+   for (i=0; i < num_ranges; ++i) {
+      float fh = ranges[i].font_size;
+      float scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh);
+      float recip_h,recip_v,sub_x,sub_y;
+      spc->h_oversample = ranges[i].h_oversample;
+      spc->v_oversample = ranges[i].v_oversample;
+      recip_h = 1.0f / spc->h_oversample;
+      recip_v = 1.0f / spc->v_oversample;
+      sub_x = stbtt__oversample_shift(spc->h_oversample);
+      sub_y = stbtt__oversample_shift(spc->v_oversample);
+      for (j=0; j < ranges[i].num_chars; ++j) {
+         stbrp_rect *r = &rects[k];
+         if (r->was_packed && r->w != 0 && r->h != 0) {
+            stbtt_packedchar *bc = &ranges[i].chardata_for_range[j];
+            int advance, lsb, x0,y0,x1,y1;
+            int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j];
+            int glyph = stbtt_FindGlyphIndex(info, codepoint);
+            stbrp_coord pad = (stbrp_coord) spc->padding;
+
+            // pad on left and top
+            r->x += pad;
+            r->y += pad;
+            r->w -= pad;
+            r->h -= pad;
+            stbtt_GetGlyphHMetrics(info, glyph, &advance, &lsb);
+            stbtt_GetGlyphBitmapBox(info, glyph,
+                                    scale * spc->h_oversample,
+                                    scale * spc->v_oversample,
+                                    &x0,&y0,&x1,&y1);
+            stbtt_MakeGlyphBitmapSubpixel(info,
+                                          spc->pixels + r->x + r->y*spc->stride_in_bytes,
+                                          r->w - spc->h_oversample+1,
+                                          r->h - spc->v_oversample+1,
+                                          spc->stride_in_bytes,
+                                          scale * spc->h_oversample,
+                                          scale * spc->v_oversample,
+                                          0,0,
+                                          glyph);
+
+            if (spc->h_oversample > 1)
+               stbtt__h_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes,
+                                  r->w, r->h, spc->stride_in_bytes,
+                                  spc->h_oversample);
+
+            if (spc->v_oversample > 1)
+               stbtt__v_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes,
+                                  r->w, r->h, spc->stride_in_bytes,
+                                  spc->v_oversample);
+
+            bc->x0       = (stbtt_int16)  r->x;
+            bc->y0       = (stbtt_int16)  r->y;
+            bc->x1       = (stbtt_int16) (r->x + r->w);
+            bc->y1       = (stbtt_int16) (r->y + r->h);
+            bc->xadvance =                scale * advance;
+            bc->xoff     =       (float)  x0 * recip_h + sub_x;
+            bc->yoff     =       (float)  y0 * recip_v + sub_y;
+            bc->xoff2    =                (x0 + r->w) * recip_h + sub_x;
+            bc->yoff2    =                (y0 + r->h) * recip_v + sub_y;
+
+            if (glyph == 0)
+               missing_glyph = j;
+         } else if (spc->skip_missing) {
+            return_value = 0;
+         } else if (r->was_packed && r->w == 0 && r->h == 0 && missing_glyph >= 0) {
+            ranges[i].chardata_for_range[j] = ranges[i].chardata_for_range[missing_glyph];
+         } else {
+            return_value = 0; // if any fail, report failure
+         }
+
+         ++k;
+      }
+   }
+
+   // restore original values
+   spc->h_oversample = old_h_over;
+   spc->v_oversample = old_v_over;
+
+   return return_value;
+}
+
+STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects)
+{
+   stbrp_pack_rects((stbrp_context *) spc->pack_info, rects, num_rects);
+}
+
+STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges)
+{
+   stbtt_fontinfo info;
+   int i,j,n, return_value = 1;
+   //stbrp_context *context = (stbrp_context *) spc->pack_info;
+   stbrp_rect    *rects;
+
+   // flag all characters as NOT packed
+   for (i=0; i < num_ranges; ++i)
+      for (j=0; j < ranges[i].num_chars; ++j)
+         ranges[i].chardata_for_range[j].x0 =
+         ranges[i].chardata_for_range[j].y0 =
+         ranges[i].chardata_for_range[j].x1 =
+         ranges[i].chardata_for_range[j].y1 = 0;
+
+   n = 0;
+   for (i=0; i < num_ranges; ++i)
+      n += ranges[i].num_chars;
+
+   rects = (stbrp_rect *) STBTT_malloc(sizeof(*rects) * n, spc->user_allocator_context);
+   if (rects == NULL)
+      return 0;
+
+   info.userdata = spc->user_allocator_context;
+   stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata,font_index));
+
+   n = stbtt_PackFontRangesGatherRects(spc, &info, ranges, num_ranges, rects);
+
+   stbtt_PackFontRangesPackRects(spc, rects, n);
+
+   return_value = stbtt_PackFontRangesRenderIntoRects(spc, &info, ranges, num_ranges, rects);
+
+   STBTT_free(rects, spc->user_allocator_context);
+   return return_value;
+}
+
+STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, float font_size,
+            int first_unicode_codepoint_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range)
+{
+   stbtt_pack_range range;
+   range.first_unicode_codepoint_in_range = first_unicode_codepoint_in_range;
+   range.array_of_unicode_codepoints = NULL;
+   range.num_chars                   = num_chars_in_range;
+   range.chardata_for_range          = chardata_for_range;
+   range.font_size                   = font_size;
+   return stbtt_PackFontRanges(spc, fontdata, font_index, &range, 1);
+}
+
+STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, float size, float *ascent, float *descent, float *lineGap)
+{
+   int i_ascent, i_descent, i_lineGap;
+   float scale;
+   stbtt_fontinfo info;
+   stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata, index));
+   scale = size > 0 ? stbtt_ScaleForPixelHeight(&info, size) : stbtt_ScaleForMappingEmToPixels(&info, -size);
+   stbtt_GetFontVMetrics(&info, &i_ascent, &i_descent, &i_lineGap);
+   *ascent  = (float) i_ascent  * scale;
+   *descent = (float) i_descent * scale;
+   *lineGap = (float) i_lineGap * scale;
+}
+
+STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph, int char_index, float *xpos, float *ypos, stbtt_aligned_quad *q, int align_to_integer)
+{
+   float ipw = 1.0f / pw, iph = 1.0f / ph;
+   const stbtt_packedchar *b = chardata + char_index;
+
+   if (align_to_integer) {
+      float x = (float) STBTT_ifloor((*xpos + b->xoff) + 0.5f);
+      float y = (float) STBTT_ifloor((*ypos + b->yoff) + 0.5f);
+      q->x0 = x;
+      q->y0 = y;
+      q->x1 = x + b->xoff2 - b->xoff;
+      q->y1 = y + b->yoff2 - b->yoff;
+   } else {
+      q->x0 = *xpos + b->xoff;
+      q->y0 = *ypos + b->yoff;
+      q->x1 = *xpos + b->xoff2;
+      q->y1 = *ypos + b->yoff2;
+   }
+
+   q->s0 = b->x0 * ipw;
+   q->t0 = b->y0 * iph;
+   q->s1 = b->x1 * ipw;
+   q->t1 = b->y1 * iph;
+
+   *xpos += b->xadvance;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// sdf computation
+//
+
+#define STBTT_min(a,b)  ((a) < (b) ? (a) : (b))
+#define STBTT_max(a,b)  ((a) < (b) ? (b) : (a))
+
+static int stbtt__ray_intersect_bezier(float orig[2], float ray[2], float q0[2], float q1[2], float q2[2], float hits[2][2])
+{
+   float q0perp = q0[1]*ray[0] - q0[0]*ray[1];
+   float q1perp = q1[1]*ray[0] - q1[0]*ray[1];
+   float q2perp = q2[1]*ray[0] - q2[0]*ray[1];
+   float roperp = orig[1]*ray[0] - orig[0]*ray[1];
+
+   float a = q0perp - 2*q1perp + q2perp;
+   float b = q1perp - q0perp;
+   float c = q0perp - roperp;
+
+   float s0 = 0., s1 = 0.;
+   int num_s = 0;
+
+   if (a != 0.0) {
+      float discr = b*b - a*c;
+      if (discr > 0.0) {
+         float rcpna = -1 / a;
+         float d = (float) STBTT_sqrt(discr);
+         s0 = (b+d) * rcpna;
+         s1 = (b-d) * rcpna;
+         if (s0 >= 0.0 && s0 <= 1.0)
+            num_s = 1;
+         if (d > 0.0 && s1 >= 0.0 && s1 <= 1.0) {
+            if (num_s == 0) s0 = s1;
+            ++num_s;
+         }
+      }
+   } else {
+      // 2*b*s + c = 0
+      // s = -c / (2*b)
+      s0 = c / (-2 * b);
+      if (s0 >= 0.0 && s0 <= 1.0)
+         num_s = 1;
+   }
+
+   if (num_s == 0)
+      return 0;
+   else {
+      float rcp_len2 = 1 / (ray[0]*ray[0] + ray[1]*ray[1]);
+      float rayn_x = ray[0] * rcp_len2, rayn_y = ray[1] * rcp_len2;
+
+      float q0d =   q0[0]*rayn_x +   q0[1]*rayn_y;
+      float q1d =   q1[0]*rayn_x +   q1[1]*rayn_y;
+      float q2d =   q2[0]*rayn_x +   q2[1]*rayn_y;
+      float rod = orig[0]*rayn_x + orig[1]*rayn_y;
+
+      float q10d = q1d - q0d;
+      float q20d = q2d - q0d;
+      float q0rd = q0d - rod;
+
+      hits[0][0] = q0rd + s0*(2.0f - 2.0f*s0)*q10d + s0*s0*q20d;
+      hits[0][1] = a*s0+b;
+
+      if (num_s > 1) {
+         hits[1][0] = q0rd + s1*(2.0f - 2.0f*s1)*q10d + s1*s1*q20d;
+         hits[1][1] = a*s1+b;
+         return 2;
+      } else {
+         return 1;
+      }
+   }
+}
+
+static int equal(float *a, float *b)
+{
+   return (a[0] == b[0] && a[1] == b[1]);
+}
+
+static int stbtt__compute_crossings_x(float x, float y, int nverts, stbtt_vertex *verts)
+{
+   int i;
+   float orig[2], ray[2] = { 1, 0 };
+   float y_frac;
+   int winding = 0;
+
+   // make sure y never passes through a vertex of the shape
+   y_frac = (float) STBTT_fmod(y, 1.0f);
+   if (y_frac < 0.01f)
+      y += 0.01f;
+   else if (y_frac > 0.99f)
+      y -= 0.01f;
+
+   orig[0] = x;
+   orig[1] = y;
+
+   // test a ray from (-infinity,y) to (x,y)
+   for (i=0; i < nverts; ++i) {
+      if (verts[i].type == STBTT_vline) {
+         int x0 = (int) verts[i-1].x, y0 = (int) verts[i-1].y;
+         int x1 = (int) verts[i  ].x, y1 = (int) verts[i  ].y;
+         if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) {
+            float x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0;
+            if (x_inter < x)
+               winding += (y0 < y1) ? 1 : -1;
+         }
+      }
+      if (verts[i].type == STBTT_vcurve) {
+         int x0 = (int) verts[i-1].x , y0 = (int) verts[i-1].y ;
+         int x1 = (int) verts[i  ].cx, y1 = (int) verts[i  ].cy;
+         int x2 = (int) verts[i  ].x , y2 = (int) verts[i  ].y ;
+         int ax = STBTT_min(x0,STBTT_min(x1,x2)), ay = STBTT_min(y0,STBTT_min(y1,y2));
+         int by = STBTT_max(y0,STBTT_max(y1,y2));
+         if (y > ay && y < by && x > ax) {
+            float q0[2],q1[2],q2[2];
+            float hits[2][2];
+            q0[0] = (float)x0;
+            q0[1] = (float)y0;
+            q1[0] = (float)x1;
+            q1[1] = (float)y1;
+            q2[0] = (float)x2;
+            q2[1] = (float)y2;
+            if (equal(q0,q1) || equal(q1,q2)) {
+               x0 = (int)verts[i-1].x;
+               y0 = (int)verts[i-1].y;
+               x1 = (int)verts[i  ].x;
+               y1 = (int)verts[i  ].y;
+               if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) {
+                  float x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0;
+                  if (x_inter < x)
+                     winding += (y0 < y1) ? 1 : -1;
+               }
+            } else {
+               int num_hits = stbtt__ray_intersect_bezier(orig, ray, q0, q1, q2, hits);
+               if (num_hits >= 1)
+                  if (hits[0][0] < 0)
+                     winding += (hits[0][1] < 0 ? -1 : 1);
+               if (num_hits >= 2)
+                  if (hits[1][0] < 0)
+                     winding += (hits[1][1] < 0 ? -1 : 1);
+            }
+         }
+      }
+   }
+   return winding;
+}
+
+static float stbtt__cuberoot( float x )
+{
+   if (x<0)
+      return -(float) STBTT_pow(-x,1.0f/3.0f);
+   else
+      return  (float) STBTT_pow( x,1.0f/3.0f);
+}
+
+// x^3 + a*x^2 + b*x + c = 0
+static int stbtt__solve_cubic(float a, float b, float c, float* r)
+{
+   float s = -a / 3;
+   float p = b - a*a / 3;
+   float q = a * (2*a*a - 9*b) / 27 + c;
+   float p3 = p*p*p;
+   float d = q*q + 4*p3 / 27;
+   if (d >= 0) {
+      float z = (float) STBTT_sqrt(d);
+      float u = (-q + z) / 2;
+      float v = (-q - z) / 2;
+      u = stbtt__cuberoot(u);
+      v = stbtt__cuberoot(v);
+      r[0] = s + u + v;
+      return 1;
+   } else {
+      float u = (float) STBTT_sqrt(-p/3);
+      float v = (float) STBTT_acos(-STBTT_sqrt(-27/p3) * q / 2) / 3; // p3 must be negative, since d is negative
+      float m = (float) STBTT_cos(v);
+      float n = (float) STBTT_cos(v-3.141592/2)*1.732050808f;
+      r[0] = s + u * 2 * m;
+      r[1] = s - u * (m + n);
+      r[2] = s - u * (m - n);
+
+      //STBTT_assert( STBTT_fabs(((r[0]+a)*r[0]+b)*r[0]+c) < 0.05f);  // these asserts may not be safe at all scales, though they're in bezier t parameter units so maybe?
+      //STBTT_assert( STBTT_fabs(((r[1]+a)*r[1]+b)*r[1]+c) < 0.05f);
+      //STBTT_assert( STBTT_fabs(((r[2]+a)*r[2]+b)*r[2]+c) < 0.05f);
+      return 3;
+   }
+}
+
+STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, float scale, int glyph, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff)
+{
+   float scale_x = scale, scale_y = scale;
+   int ix0,iy0,ix1,iy1;
+   int w,h;
+   unsigned char *data;
+
+   if (scale == 0) return NULL;
+
+   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale, scale, 0.0f,0.0f, &ix0,&iy0,&ix1,&iy1);
+
+   // if empty, return NULL
+   if (ix0 == ix1 || iy0 == iy1)
+      return NULL;
+
+   ix0 -= padding;
+   iy0 -= padding;
+   ix1 += padding;
+   iy1 += padding;
+
+   w = (ix1 - ix0);
+   h = (iy1 - iy0);
+
+   if (width ) *width  = w;
+   if (height) *height = h;
+   if (xoff  ) *xoff   = ix0;
+   if (yoff  ) *yoff   = iy0;
+
+   // invert for y-downwards bitmaps
+   scale_y = -scale_y;
+
+   {
+      int x,y,i,j;
+      float *precompute;
+      stbtt_vertex *verts;
+      int num_verts = stbtt_GetGlyphShape(info, glyph, &verts);
+      data = (unsigned char *) STBTT_malloc(w * h, info->userdata);
+      precompute = (float *) STBTT_malloc(num_verts * sizeof(float), info->userdata);
+
+      for (i=0,j=num_verts-1; i < num_verts; j=i++) {
+         if (verts[i].type == STBTT_vline) {
+            float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
+            float x1 = verts[j].x*scale_x, y1 = verts[j].y*scale_y;
+            float dist = (float) STBTT_sqrt((x1-x0)*(x1-x0) + (y1-y0)*(y1-y0));
+            precompute[i] = (dist == 0) ? 0.0f : 1.0f / dist;
+         } else if (verts[i].type == STBTT_vcurve) {
+            float x2 = verts[j].x *scale_x, y2 = verts[j].y *scale_y;
+            float x1 = verts[i].cx*scale_x, y1 = verts[i].cy*scale_y;
+            float x0 = verts[i].x *scale_x, y0 = verts[i].y *scale_y;
+            float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
+            float len2 = bx*bx + by*by;
+            if (len2 != 0.0f)
+               precompute[i] = 1.0f / (bx*bx + by*by);
+            else
+               precompute[i] = 0.0f;
+         } else
+            precompute[i] = 0.0f;
+      }
+
+      for (y=iy0; y < iy1; ++y) {
+         for (x=ix0; x < ix1; ++x) {
+            float val;
+            float min_dist = 999999.0f;
+            float sx = (float) x + 0.5f;
+            float sy = (float) y + 0.5f;
+            float x_gspace = (sx / scale_x);
+            float y_gspace = (sy / scale_y);
+
+            int winding = stbtt__compute_crossings_x(x_gspace, y_gspace, num_verts, verts); // @OPTIMIZE: this could just be a rasterization, but needs to be line vs. non-tesselated curves so a new path
+
+            for (i=0; i < num_verts; ++i) {
+               float x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
+
+               if (verts[i].type == STBTT_vline && precompute[i] != 0.0f) {
+                  float x1 = verts[i-1].x*scale_x, y1 = verts[i-1].y*scale_y;
+
+                  float dist,dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
+                  if (dist2 < min_dist*min_dist)
+                     min_dist = (float) STBTT_sqrt(dist2);
+
+                  // coarse culling against bbox
+                  //if (sx > STBTT_min(x0,x1)-min_dist && sx < STBTT_max(x0,x1)+min_dist &&
+                  //    sy > STBTT_min(y0,y1)-min_dist && sy < STBTT_max(y0,y1)+min_dist)
+                  dist = (float) STBTT_fabs((x1-x0)*(y0-sy) - (y1-y0)*(x0-sx)) * precompute[i];
+                  STBTT_assert(i != 0);
+                  if (dist < min_dist) {
+                     // check position along line
+                     // x' = x0 + t*(x1-x0), y' = y0 + t*(y1-y0)
+                     // minimize (x'-sx)*(x'-sx)+(y'-sy)*(y'-sy)
+                     float dx = x1-x0, dy = y1-y0;
+                     float px = x0-sx, py = y0-sy;
+                     // minimize (px+t*dx)^2 + (py+t*dy)^2 = px*px + 2*px*dx*t + t^2*dx*dx + py*py + 2*py*dy*t + t^2*dy*dy
+                     // derivative: 2*px*dx + 2*py*dy + (2*dx*dx+2*dy*dy)*t, set to 0 and solve
+                     float t = -(px*dx + py*dy) / (dx*dx + dy*dy);
+                     if (t >= 0.0f && t <= 1.0f)
+                        min_dist = dist;
+                  }
+               } else if (verts[i].type == STBTT_vcurve) {
+                  float x2 = verts[i-1].x *scale_x, y2 = verts[i-1].y *scale_y;
+                  float x1 = verts[i  ].cx*scale_x, y1 = verts[i  ].cy*scale_y;
+                  float box_x0 = STBTT_min(STBTT_min(x0,x1),x2);
+                  float box_y0 = STBTT_min(STBTT_min(y0,y1),y2);
+                  float box_x1 = STBTT_max(STBTT_max(x0,x1),x2);
+                  float box_y1 = STBTT_max(STBTT_max(y0,y1),y2);
+                  // coarse culling against bbox to avoid computing cubic unnecessarily
+                  if (sx > box_x0-min_dist && sx < box_x1+min_dist && sy > box_y0-min_dist && sy < box_y1+min_dist) {
+                     int num=0;
+                     float ax = x1-x0, ay = y1-y0;
+                     float bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
+                     float mx = x0 - sx, my = y0 - sy;
+                     float res[3] = {0.f,0.f,0.f};
+                     float px,py,t,it,dist2;
+                     float a_inv = precompute[i];
+                     if (a_inv == 0.0) { // if a_inv is 0, it's 2nd degree so use quadratic formula
+                        float a = 3*(ax*bx + ay*by);
+                        float b = 2*(ax*ax + ay*ay) + (mx*bx+my*by);
+                        float c = mx*ax+my*ay;
+                        if (a == 0.0) { // if a is 0, it's linear
+                           if (b != 0.0) {
+                              res[num++] = -c/b;
+                           }
+                        } else {
+                           float discriminant = b*b - 4*a*c;
+                           if (discriminant < 0)
+                              num = 0;
+                           else {
+                              float root = (float) STBTT_sqrt(discriminant);
+                              res[0] = (-b - root)/(2*a);
+                              res[1] = (-b + root)/(2*a);
+                              num = 2; // don't bother distinguishing 1-solution case, as code below will still work
+                           }
+                        }
+                     } else {
+                        float b = 3*(ax*bx + ay*by) * a_inv; // could precompute this as it doesn't depend on sample point
+                        float c = (2*(ax*ax + ay*ay) + (mx*bx+my*by)) * a_inv;
+                        float d = (mx*ax+my*ay) * a_inv;
+                        num = stbtt__solve_cubic(b, c, d, res);
+                     }
+                     dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
+                     if (dist2 < min_dist*min_dist)
+                        min_dist = (float) STBTT_sqrt(dist2);
+
+                     if (num >= 1 && res[0] >= 0.0f && res[0] <= 1.0f) {
+                        t = res[0], it = 1.0f - t;
+                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
+                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
+                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
+                        if (dist2 < min_dist * min_dist)
+                           min_dist = (float) STBTT_sqrt(dist2);
+                     }
+                     if (num >= 2 && res[1] >= 0.0f && res[1] <= 1.0f) {
+                        t = res[1], it = 1.0f - t;
+                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
+                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
+                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
+                        if (dist2 < min_dist * min_dist)
+                           min_dist = (float) STBTT_sqrt(dist2);
+                     }
+                     if (num >= 3 && res[2] >= 0.0f && res[2] <= 1.0f) {
+                        t = res[2], it = 1.0f - t;
+                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
+                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
+                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
+                        if (dist2 < min_dist * min_dist)
+                           min_dist = (float) STBTT_sqrt(dist2);
+                     }
+                  }
+               }
+            }
+            if (winding == 0)
+               min_dist = -min_dist;  // if outside the shape, value is negative
+            val = onedge_value + pixel_dist_scale * min_dist;
+            if (val < 0)
+               val = 0;
+            else if (val > 255)
+               val = 255;
+            data[(y-iy0)*w+(x-ix0)] = (unsigned char) val;
+         }
+      }
+      STBTT_free(precompute, info->userdata);
+      STBTT_free(verts, info->userdata);
+   }
+   return data;
+}
+
+STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, float scale, int codepoint, int padding, unsigned char onedge_value, float pixel_dist_scale, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetGlyphSDF(info, scale, stbtt_FindGlyphIndex(info, codepoint), padding, onedge_value, pixel_dist_scale, width, height, xoff, yoff);
+}
+
+STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata)
+{
+   STBTT_free(bitmap, userdata);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// font name matching -- recommended not to use this
+//
+
+// check if a utf8 string contains a prefix which is the utf16 string; if so return length of matching utf8 string
+static stbtt_int32 stbtt__CompareUTF8toUTF16_bigendian_prefix(stbtt_uint8 *s1, stbtt_int32 len1, stbtt_uint8 *s2, stbtt_int32 len2)
+{
+   stbtt_int32 i=0;
+
+   // convert utf16 to utf8 and compare the results while converting
+   while (len2) {
+      stbtt_uint16 ch = s2[0]*256 + s2[1];
+      if (ch < 0x80) {
+         if (i >= len1) return -1;
+         if (s1[i++] != ch) return -1;
+      } else if (ch < 0x800) {
+         if (i+1 >= len1) return -1;
+         if (s1[i++] != 0xc0 + (ch >> 6)) return -1;
+         if (s1[i++] != 0x80 + (ch & 0x3f)) return -1;
+      } else if (ch >= 0xd800 && ch < 0xdc00) {
+         stbtt_uint32 c;
+         stbtt_uint16 ch2 = s2[2]*256 + s2[3];
+         if (i+3 >= len1) return -1;
+         c = ((ch - 0xd800) << 10) + (ch2 - 0xdc00) + 0x10000;
+         if (s1[i++] != 0xf0 + (c >> 18)) return -1;
+         if (s1[i++] != 0x80 + ((c >> 12) & 0x3f)) return -1;
+         if (s1[i++] != 0x80 + ((c >>  6) & 0x3f)) return -1;
+         if (s1[i++] != 0x80 + ((c      ) & 0x3f)) return -1;
+         s2 += 2; // plus another 2 below
+         len2 -= 2;
+      } else if (ch >= 0xdc00 && ch < 0xe000) {
+         return -1;
+      } else {
+         if (i+2 >= len1) return -1;
+         if (s1[i++] != 0xe0 + (ch >> 12)) return -1;
+         if (s1[i++] != 0x80 + ((ch >> 6) & 0x3f)) return -1;
+         if (s1[i++] != 0x80 + ((ch     ) & 0x3f)) return -1;
+      }
+      s2 += 2;
+      len2 -= 2;
+   }
+   return i;
+}
+
+static int stbtt_CompareUTF8toUTF16_bigendian_internal(char *s1, int len1, char *s2, int len2)
+{
+   return len1 == stbtt__CompareUTF8toUTF16_bigendian_prefix((stbtt_uint8*) s1, len1, (stbtt_uint8*) s2, len2);
+}
+
+// returns results in whatever encoding you request... but note that 2-byte encodings
+// will be BIG-ENDIAN... use stbtt_CompareUTF8toUTF16_bigendian() to compare
+STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID)
+{
+   stbtt_int32 i,count,stringOffset;
+   stbtt_uint8 *fc = font->data;
+   stbtt_uint32 offset = font->fontstart;
+   stbtt_uint32 nm = stbtt__find_table(fc, offset, "name");
+   if (!nm) return NULL;
+
+   count = ttUSHORT(fc+nm+2);
+   stringOffset = nm + ttUSHORT(fc+nm+4);
+   for (i=0; i < count; ++i) {
+      stbtt_uint32 loc = nm + 6 + 12 * i;
+      if (platformID == ttUSHORT(fc+loc+0) && encodingID == ttUSHORT(fc+loc+2)
+          && languageID == ttUSHORT(fc+loc+4) && nameID == ttUSHORT(fc+loc+6)) {
+         *length = ttUSHORT(fc+loc+8);
+         return (const char *) (fc+stringOffset+ttUSHORT(fc+loc+10));
+      }
+   }
+   return NULL;
+}
+
+static int stbtt__matchpair(stbtt_uint8 *fc, stbtt_uint32 nm, stbtt_uint8 *name, stbtt_int32 nlen, stbtt_int32 target_id, stbtt_int32 next_id)
+{
+   stbtt_int32 i;
+   stbtt_int32 count = ttUSHORT(fc+nm+2);
+   stbtt_int32 stringOffset = nm + ttUSHORT(fc+nm+4);
+
+   for (i=0; i < count; ++i) {
+      stbtt_uint32 loc = nm + 6 + 12 * i;
+      stbtt_int32 id = ttUSHORT(fc+loc+6);
+      if (id == target_id) {
+         // find the encoding
+         stbtt_int32 platform = ttUSHORT(fc+loc+0), encoding = ttUSHORT(fc+loc+2), language = ttUSHORT(fc+loc+4);
+
+         // is this a Unicode encoding?
+         if (platform == 0 || (platform == 3 && encoding == 1) || (platform == 3 && encoding == 10)) {
+            stbtt_int32 slen = ttUSHORT(fc+loc+8);
+            stbtt_int32 off = ttUSHORT(fc+loc+10);
+
+            // check if there's a prefix match
+            stbtt_int32 matchlen = stbtt__CompareUTF8toUTF16_bigendian_prefix(name, nlen, fc+stringOffset+off,slen);
+            if (matchlen >= 0) {
+               // check for target_id+1 immediately following, with same encoding & language
+               if (i+1 < count && ttUSHORT(fc+loc+12+6) == next_id && ttUSHORT(fc+loc+12) == platform && ttUSHORT(fc+loc+12+2) == encoding && ttUSHORT(fc+loc+12+4) == language) {
+                  slen = ttUSHORT(fc+loc+12+8);
+                  off = ttUSHORT(fc+loc+12+10);
+                  if (slen == 0) {
+                     if (matchlen == nlen)
+                        return 1;
+                  } else if (matchlen < nlen && name[matchlen] == ' ') {
+                     ++matchlen;
+                     if (stbtt_CompareUTF8toUTF16_bigendian_internal((char*) (name+matchlen), nlen-matchlen, (char*)(fc+stringOffset+off),slen))
+                        return 1;
+                  }
+               } else {
+                  // if nothing immediately following
+                  if (matchlen == nlen)
+                     return 1;
+               }
+            }
+         }
+
+         // @TODO handle other encodings
+      }
+   }
+   return 0;
+}
+
+static int stbtt__matches(stbtt_uint8 *fc, stbtt_uint32 offset, stbtt_uint8 *name, stbtt_int32 flags)
+{
+   stbtt_int32 nlen = (stbtt_int32) STBTT_strlen((char *) name);
+   stbtt_uint32 nm,hd;
+   if (!stbtt__isfont(fc+offset)) return 0;
+
+   // check italics/bold/underline flags in macStyle...
+   if (flags) {
+      hd = stbtt__find_table(fc, offset, "head");
+      if ((ttUSHORT(fc+hd+44) & 7) != (flags & 7)) return 0;
+   }
+
+   nm = stbtt__find_table(fc, offset, "name");
+   if (!nm) return 0;
+
+   if (flags) {
+      // if we checked the macStyle flags, then just check the family and ignore the subfamily
+      if (stbtt__matchpair(fc, nm, name, nlen, 16, -1))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  1, -1))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  3, -1))  return 1;
+   } else {
+      if (stbtt__matchpair(fc, nm, name, nlen, 16, 17))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  1,  2))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  3, -1))  return 1;
+   }
+
+   return 0;
+}
+
+static int stbtt_FindMatchingFont_internal(unsigned char *font_collection, char *name_utf8, stbtt_int32 flags)
+{
+   stbtt_int32 i;
+   for (i=0;;++i) {
+      stbtt_int32 off = stbtt_GetFontOffsetForIndex(font_collection, i);
+      if (off < 0) return off;
+      if (stbtt__matches((stbtt_uint8 *) font_collection, off, (stbtt_uint8*) name_utf8, flags))
+         return off;
+   }
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset,
+                                float pixel_height, unsigned char *pixels, int pw, int ph,
+                                int first_char, int num_chars, stbtt_bakedchar *chardata)
+{
+   return stbtt_BakeFontBitmap_internal((unsigned char *) data, offset, pixel_height, pixels, pw, ph, first_char, num_chars, chardata);
+}
+
+STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index)
+{
+   return stbtt_GetFontOffsetForIndex_internal((unsigned char *) data, index);
+}
+
+STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data)
+{
+   return stbtt_GetNumberOfFonts_internal((unsigned char *) data);
+}
+
+STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset)
+{
+   return stbtt_InitFont_internal(info, (unsigned char *) data, offset);
+}
+
+STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags)
+{
+   return stbtt_FindMatchingFont_internal((unsigned char *) fontdata, (char *) name, flags);
+}
+
+STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2)
+{
+   return stbtt_CompareUTF8toUTF16_bigendian_internal((char *) s1, len1, (char *) s2, len2);
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#endif // STB_TRUETYPE_IMPLEMENTATION
+
+
+// FULL VERSION HISTORY
+//
+//   1.25 (2021-07-11) many fixes
+//   1.24 (2020-02-05) fix warning
+//   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
+//   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
+//   1.21 (2019-02-25) fix warning
+//   1.20 (2019-02-07) PackFontRange skips missing codepoints; GetScaleFontVMetrics()
+//   1.19 (2018-02-11) OpenType GPOS kerning (horizontal only), STBTT_fmod
+//   1.18 (2018-01-29) add missing function
+//   1.17 (2017-07-23) make more arguments const; doc fix
+//   1.16 (2017-07-12) SDF support
+//   1.15 (2017-03-03) make more arguments const
+//   1.14 (2017-01-16) num-fonts-in-TTC function
+//   1.13 (2017-01-02) support OpenType fonts, certain Apple fonts
+//   1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual
+//   1.11 (2016-04-02) fix unused-variable warning
+//   1.10 (2016-04-02) allow user-defined fabs() replacement
+//                     fix memory leak if fontsize=0.0
+//                     fix warning from duplicate typedef
+//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use alloc userdata for PackFontRanges
+//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
+//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
+//                     allow PackFontRanges to pack and render in separate phases;
+//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
+//                     fixed an assert() bug in the new rasterizer
+//                     replace assert() with STBTT_assert() in new rasterizer
+//   1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine)
+//                     also more precise AA rasterizer, except if shapes overlap
+//                     remove need for STBTT_sort
+//   1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC
+//   1.04 (2015-04-15) typo in example
+//   1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes
+//   1.02 (2014-12-10) fix various warnings & compile issues w/ stb_rect_pack, C++
+//   1.01 (2014-12-08) fix subpixel position when oversampling to exactly match
+//                        non-oversampled; STBTT_POINT_SIZE for packed case only
+//   1.00 (2014-12-06) add new PackBegin etc. API, w/ support for oversampling
+//   0.99 (2014-09-18) fix multiple bugs with subpixel rendering (ryg)
+//   0.9  (2014-08-07) support certain mac/iOS fonts without an MS platformID
+//   0.8b (2014-07-07) fix a warning
+//   0.8  (2014-05-25) fix a few more warnings
+//   0.7  (2013-09-25) bugfix: subpixel glyph bug fixed in 0.5 had come back
+//   0.6c (2012-07-24) improve documentation
+//   0.6b (2012-07-20) fix a few more warnings
+//   0.6  (2012-07-17) fix warnings; added stbtt_ScaleForMappingEmToPixels,
+//                        stbtt_GetFontBoundingBox, stbtt_IsGlyphEmpty
+//   0.5  (2011-12-09) bugfixes:
+//                        subpixel glyph renderer computed wrong bounding box
+//                        first vertex of shape can be off-curve (FreeSans)
+//   0.4b (2011-12-03) fixed an error in the font baking example
+//   0.4  (2011-12-01) kerning, subpixel rendering (tor)
+//                    bugfixes for:
+//                        codepoint-to-glyph conversion using table fmt=12
+//                        codepoint-to-glyph conversion using table fmt=4
+//                        stbtt_GetBakedQuad with non-square texture (Zer)
+//                    updated Hello World! sample to use kerning and subpixel
+//                    fixed some warnings
+//   0.3  (2009-06-24) cmap fmt=12, compound shapes (MM)
+//                    userdata, malloc-from-userdata, non-zero fill (stb)
+//   0.2  (2009-03-11) Fix unsigned/signed char warnings
+//   0.1  (2009-03-09) First public release
+//
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
--- /dev/null
+++ b/include-demo/stb_voxel_render.h
@@ -1,0 +1,3807 @@
+// stb_voxel_render.h - v0.89 - Sean Barrett, 2015 - public domain
+//
+// This library helps render large-scale "voxel" worlds for games,
+// in this case, one with blocks that can have textures and that
+// can also be a few shapes other than cubes.
+//
+//    Video introduction:
+//       http://www.youtube.com/watch?v=2vnTtiLrV1w
+//
+//    Minecraft-viewer sample app (not very simple though):
+//       http://github.com/nothings/stb/tree/master/tests/caveview
+//
+// It works by creating triangle meshes. The library includes
+//
+//    - converter from dense 3D arrays of block info to vertex mesh
+//    - vertex & fragment shaders for the vertex mesh
+//    - assistance in setting up shader state
+//
+// For portability, none of the library code actually accesses
+// the 3D graphics API. (At the moment, it's not actually portable
+// since the shaders are GLSL only, but patches are welcome.)
+//
+// You have to do all the caching and tracking of vertex buffers
+// yourself. However, you could also try making a game with
+// a small enough world that it's fully loaded rather than
+// streaming. Currently the preferred vertex format is 20 bytes
+// per quad. There are designs to allow much more compact formats
+// with a slight reduction in shader features, but no roadmap
+// for actually implementing them.
+//
+//
+// USAGE
+//
+//   #define the symbol STB_VOXEL_RENDER_IMPLEMENTATION in *one*
+//   C/C++ file before the #include of this file; the implementation
+//   will be generated in that file.
+//
+//   If you define the symbols STB_VOXEL_RENDER_STATIC, then the
+//   implementation will be private to that file.
+//
+//
+// FEATURES
+//
+//   - you can choose textured blocks with the features below,
+//     or colored voxels with 2^24 colors and no textures.
+//
+//   - voxels are mostly just cubes, but there's support for
+//     half-height cubes and diagonal slopes, half-height
+//     diagonals, and even odder shapes especially for doing
+//     more-continuous "ground".
+//
+//   - texture coordinates are projections along one of the major
+//     axes, with the per-texture scaling.
+//
+//   - a number of aspects of the shader and the vertex format
+//     are configurable; the library generally takes care of
+//     coordinating the vertex format with the mesh for you.
+//
+//
+// FEATURES (SHADER PERSPECTIVE)
+//
+//   - vertices aligned on integer lattice, z on multiples of 0.5
+//   - per-vertex "lighting" or "ambient occlusion" value (6 bits)
+//   - per-vertex texture crossfade (3 bits)
+//
+//   - per-face texture #1 id (8-bit index into array texture)
+//   - per-face texture #2 id (8-bit index into second array texture)
+//   - per-face color (6-bit palette index, 2 bits of per-texture boolean enable)
+//   - per-face 5-bit normal for lighting calculations & texture coord computation
+//   - per-face 2-bit texture matrix rotation to rotate faces
+//
+//   - indexed-by-texture-id scale factor (separate for texture #1 and texture #2)
+//   - indexed-by-texture-#2-id blend mode (alpha composite or modulate/multiply);
+//     the first is good for decals, the second for detail textures, "light maps",
+//     etc; both modes are controlled by texture #2's alpha, scaled by the
+//     per-vertex texture crossfade and the per-face color (if enabled on texture #2);
+//     modulate/multiply multiplies by an extra factor of 2.0 so that if you
+//     make detail maps whose average brightness is 0.5 everything works nicely.
+//
+//   - ambient lighting: half-lambert directional plus constant, all scaled by vertex ao
+//   - face can be fullbright (emissive), controlled by per-face color
+//   - installable lighting, with default single-point-light
+//   - installable fog, with default hacked smoothstep
+//
+//  Note that all the variations of lighting selection and texture
+//  blending are run-time conditions in the shader, so they can be
+//  intermixed in a single mesh.
+//
+//
+// INTEGRATION ARC
+//
+//   The way to get this library to work from scratch is to do the following:
+//
+//      Step 1. define STBVOX_CONFIG_MODE to 0
+//
+//        This mode uses only vertex attributes and uniforms, and is easiest
+//        to get working. It requires 32 bytes per quad and limits the
+//        size of some tables to avoid hitting uniform limits.
+//
+//      Step 2. define STBVOX_CONFIG_MODE to 1
+//
+//        This requires using a texture buffer to store the quad data,
+//        reducing the size to 20 bytes per quad.
+//
+//      Step 3: define STBVOX_CONFIG_PREFER_TEXBUFFER
+//
+//        This causes some uniforms to be stored as texture buffers
+//        instead. This increases the size of some of those tables,
+//        and avoids a potential slow path (gathering non-uniform
+//        data from uniforms) on some hardware.
+//
+//   In the future I might add additional modes that have significantly
+//   smaller meshes but reduce features, down as small as 6 bytes per quad.
+//   See elsewhere in this file for a table of candidate modes. Switching
+//   to a mode will require changing some of your mesh creation code, but
+//   everything else should be seamless. (And I'd like to change the API
+//   so that mesh creation is data-driven the way the uniforms are, and
+//   then you wouldn't even have to change anything but the mode number.)
+//
+//
+// IMPROVEMENTS FOR SHIP-WORTHY PROGRAMS USING THIS LIBRARY
+//
+//   I currently tolerate a certain level of "bugginess" in this library.
+//
+//   I'm referring to things which look a little wrong (as long as they
+//   don't cause holes or cracks in the output meshes), or things which
+//   do not produce as optimal a mesh as possible. Notable examples:
+//
+//        -  incorrect lighting on slopes
+//        -  inefficient meshes for vheight blocks
+//
+//   I am willing to do the work to improve these things if someone is
+//   going to ship a substantial program that would be improved by them.
+//   (It need not be commercial, nor need it be a game.) I just didn't
+//   want to do the work up front if it might never be leveraged. So just
+//   submit a bug report as usual (github is preferred), but add a note
+//   that this is for a thing that is really going to ship. (That means
+//   you need to be far enough into the project that it's clear you're
+//   committed to it; not during early exploratory development.)
+//
+//
+// VOXEL MESH API
+//
+//   Context
+//
+//     To understand the API, make sure you first understand the feature set
+//     listed above.
+//
+//     Because the vertices are compact, they have very limited spatial
+//     precision. Thus a single mesh can only contain the data for a limited
+//     area. To make very large voxel maps, you'll need to build multiple
+//     vertex buffers. (But you want this anyway for frustum culling.)
+//
+//     Each generated mesh has three components:
+//             - vertex data (vertex buffer)
+//             - face data (optional, stored in texture buffer)
+//             - mesh transform (uniforms)
+//
+//     Once you've generated the mesh with this library, it's up to you
+//     to upload it to the GPU, to keep track of the state, and to render
+//     it.
+//
+//   Concept
+//
+//     The basic design is that you pass in one or more 3D arrays; each array
+//     is (typically) one-byte-per-voxel and contains information about one
+//     or more properties of some particular voxel property.
+//
+//     Because there is so much per-vertex and per-face data possible
+//     in the output, and each voxel can have 6 faces and 8 vertices, it
+//     would require an very large data structure to describe all
+//     of the possibilities, and this would cause the mesh-creation
+//     process to be slow. Instead, the API provides multiple ways
+//     to express each property, some more compact, others less so;
+//     each such way has some limitations on what it can express.
+//
+//     Note that there are so many paths and combinations, not all of them
+//     have been tested. Just report bugs and I'll fix 'em.
+//
+//   Details
+//
+//     See the API documentation in the header-file section.
+//
+//
+// CONTRIBUTORS
+//
+//   Features             Porting            Bugfixes & Warnings
+//  Sean Barrett                          github:r-leyh   Jesus Fernandez
+//                                        Miguel Lechon   github:Arbeiterunfallversicherungsgesetz
+//                                        Thomas Frase    James Hofmann
+//                                        Stephen Olsen   github:guitarfreak
+//
+// VERSION HISTORY
+//
+//   0.89   (2020-02-02)  bugfix in sample code
+//   0.88   (2019-03-04)  fix warnings
+//   0.87   (2019-02-25)  fix warning
+//   0.86   (2019-02-07)  fix typos in comments
+//   0.85   (2017-03-03)  add block_selector (by guitarfreak)
+//   0.84   (2016-04-02)  fix GLSL syntax error on glModelView path
+//   0.83   (2015-09-13)  remove non-constant struct initializers to support more compilers
+//   0.82   (2015-08-01)  added input.packed_compact to store rot, vheight & texlerp efficiently
+//                        fix broken tex_overlay2
+//   0.81   (2015-05-28)  fix broken STBVOX_CONFIG_OPTIMIZED_VHEIGHT
+//   0.80   (2015-04-11)  fix broken STBVOX_CONFIG_ROTATION_IN_LIGHTING refactoring
+//                        change STBVOX_MAKE_LIGHTING to STBVOX_MAKE_LIGHTING_EXT so
+//                                    that header defs don't need to see config vars
+//                        add STBVOX_CONFIG_VHEIGHT_IN_LIGHTING and other vheight fixes
+//                        added documentation for vheight ("weird slopes")
+//   0.79   (2015-04-01)  fix the missing types from 0.78; fix string constants being const
+//   0.78   (2015-04-02)  bad "#else", compile as C++
+//   0.77   (2015-04-01)  documentation tweaks, rename config var to STB_VOXEL_RENDER_STATIC
+//   0.76   (2015-04-01)  typos, signed/unsigned shader issue, more documentation
+//   0.75   (2015-04-01)  initial release
+//
+//
+// HISTORICAL FOUNDATION
+//
+//   stb_voxel_render   20-byte quads   2015/01
+//   zmc engine         32-byte quads   2013/12
+//   zmc engine         96-byte quads   2011/10
+//
+//
+// LICENSE
+//
+//   See end of file for license information.
+
+#ifndef INCLUDE_STB_VOXEL_RENDER_H
+#define INCLUDE_STB_VOXEL_RENDER_H
+
+#include <stdlib.h>
+
+typedef struct stbvox_mesh_maker stbvox_mesh_maker;
+typedef struct stbvox_input_description stbvox_input_description;
+
+#ifdef STB_VOXEL_RENDER_STATIC
+#define STBVXDEC static
+#else
+#define STBVXDEC extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// CONFIGURATION MACROS
+//
+//  #define STBVOX_CONFIG_MODE <integer>           // REQUIRED
+//     Configures the overall behavior of stb_voxel_render. This
+//     can affect the shaders, the uniform info, and other things.
+//     (If you need more than one mode in the same app, you can
+//     use STB_VOXEL_RENDER_STATIC to create multiple versions
+//     in separate files, and then wrap them.)
+//
+//         Mode value       Meaning
+//             0               Textured blocks, 32-byte quads
+//             1               Textured blocks, 20-byte quads
+//            20               Untextured blocks, 32-byte quads
+//            21               Untextured blocks, 20-byte quads
+//
+//
+//  #define STBVOX_CONFIG_PRECISION_Z  <integer>   // OPTIONAL
+//     Defines the number of bits of fractional position for Z.
+//     Only 0 or 1 are valid. 1 is the default. If 0, then a
+//     single mesh has twice the legal Z range; e.g. in
+//     modes 0,1,20,21, Z in the mesh can extend to 511 instead
+//     of 255. However, half-height blocks cannot be used.
+//
+// All of the following are just #ifdef tested so need no values, and are optional.
+//
+//    STBVOX_CONFIG_BLOCKTYPE_SHORT
+//        use unsigned 16-bit values for 'blocktype' in the input instead of 8-bit values
+//
+//    STBVOX_CONFIG_OPENGL_MODELVIEW
+//        use the gl_ModelView matrix rather than the explicit uniform
+//
+//    STBVOX_CONFIG_HLSL
+//        NOT IMPLEMENTED! Define HLSL shaders instead of GLSL shaders
+//
+//    STBVOX_CONFIG_PREFER_TEXBUFFER
+//        Stores many of the uniform arrays in texture buffers instead,
+//        so they can be larger and may be more efficient on some hardware.
+//
+//    STBVOX_CONFIG_LIGHTING_SIMPLE
+//        Creates a simple lighting engine with a single point light source
+//        in addition to the default half-lambert ambient light.
+//
+//    STBVOX_CONFIG_LIGHTING
+//        Declares a lighting function hook; you must append a lighting function
+//        to the shader before compiling it:
+//            vec3 compute_lighting(vec3 pos, vec3 norm, vec3 albedo, vec3 ambient);
+//        'ambient' is the half-lambert ambient light with vertex ambient-occlusion applied
+//
+//    STBVOX_CONFIG_FOG_SMOOTHSTEP
+//        Defines a simple unrealistic fog system designed to maximize
+//        unobscured view distance while not looking too weird when things
+//        emerge from the fog. Configured using an extra array element
+//        in the STBVOX_UNIFORM_ambient uniform.
+//
+//    STBVOX_CONFIG_FOG
+//        Defines a fog function hook; you must append a fog function to
+//        the shader before compiling it:
+//            vec3 compute_fog(vec3 color, vec3 relative_pos, float fragment_alpha);
+//        "color" is the incoming pre-fogged color, fragment_alpha is the alpha value,
+//        and relative_pos is the vector from the point to the camera in worldspace
+//
+//    STBVOX_CONFIG_DISABLE_TEX2
+//        This disables all processing of texture 2 in the shader in case
+//        you don't use it. Eventually this could be replaced with a mode
+//        that omits the unused data entirely.
+//
+//    STBVOX_CONFIG_TEX1_EDGE_CLAMP
+//    STBVOX_CONFIG_TEX2_EDGE_CLAMP
+//        If you want to edge clamp the textures, instead of letting them wrap,
+//        set this flag. By default stb_voxel_render relies on texture wrapping
+//        to simplify texture coordinate generation. This flag forces it to do
+//        it correctly, although there can still be minor artifacts.
+//
+//    STBVOX_CONFIG_ROTATION_IN_LIGHTING
+//        Changes the meaning of the 'lighting' mesher input variable to also
+//        store the rotation; see later discussion.
+//
+//    STBVOX_CONFIG_VHEIGHT_IN_LIGHTING
+//        Changes the meaning of the 'lighting' mesher input variable to also
+//        store the vheight; see later discussion. Cannot use both this and
+//        the previous variable.
+//
+//    STBVOX_CONFIG_PREMULTIPLIED_ALPHA
+//        Adjusts the shader calculations on the assumption that tex1.rgba,
+//        tex2.rgba, and color.rgba all use premultiplied values, and that
+//        the output of the fragment shader should be premultiplied.
+//
+//    STBVOX_CONFIG_UNPREMULTIPLY
+//        Only meaningful if STBVOX_CONFIG_PREMULTIPLIED_ALPHA is defined.
+//        Changes the behavior described above so that the inputs are
+//        still premultiplied alpha, but the output of the fragment
+//        shader is not premultiplied alpha. This is needed when allowing
+//        non-unit alpha values but not doing alpha-blending (for example
+//        when alpha testing).
+//
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// MESHING
+//
+// A mesh represents a (typically) small chunk of a larger world.
+// Meshes encode coordinates using small integers, so those
+// coordinates must be relative to some base location.
+// All of the coordinates in the functions below use
+// these relative coordinates unless explicitly stated
+// otherwise.
+//
+// Input to the meshing step is documented further down
+
+STBVXDEC void stbvox_init_mesh_maker(stbvox_mesh_maker *mm);
+// Call this function to initialize a mesh-maker context structure
+// used to build meshes. You should have one context per thread
+// that's building meshes.
+
+STBVXDEC void stbvox_set_buffer(stbvox_mesh_maker *mm, int mesh, int slot, void *buffer, size_t len);
+// Call this to set the buffer into which stbvox will write the mesh
+// it creates. It can build more than one mesh in parallel (distinguished
+// by the 'mesh' parameter), and each mesh can be made up of more than
+// one buffer (distinguished by the 'slot' parameter).
+//
+// Multiple meshes are under your control; use the 'selector' input
+// variable to choose which mesh each voxel's vertices are written to.
+// For example, you can use this to generate separate meshes for opaque
+// and transparent data.
+//
+// You can query the number of slots by calling stbvox_get_buffer_count
+// described below. The meaning of the buffer for each slot depends
+// on STBVOX_CONFIG_MODE.
+//
+//   In mode 0 & mode 20, there is only one slot. The mesh data for that
+//   slot is two interleaved vertex attributes: attr_vertex, a single
+//   32-bit uint, and attr_face, a single 32-bit uint.
+//
+//   In mode 1 & mode 21, there are two slots. The first buffer should
+//   be four times as large as the second buffer. The first buffer
+//   contains a single vertex attribute: 'attr_vertex', a single 32-bit uint.
+//   The second buffer contains texture buffer data (an array of 32-bit uints)
+//   that will be accessed through the sampler identified by STBVOX_UNIFORM_face_data.
+
+STBVXDEC int stbvox_get_buffer_count(stbvox_mesh_maker *mm);
+// Returns the number of buffers needed per mesh as described above.
+
+STBVXDEC int stbvox_get_buffer_size_per_quad(stbvox_mesh_maker *mm, int slot);
+// Returns how much of a given buffer will get used per quad. This
+// allows you to choose correct relative sizes for each buffer, although
+// the values are fixed based on the configuration you've selected at
+// compile time, and the details are described in stbvox_set_buffer.
+
+STBVXDEC void stbvox_set_default_mesh(stbvox_mesh_maker *mm, int mesh);
+// Selects which mesh the mesher will output to (see previous function)
+// if the input doesn't specify a per-voxel selector. (I doubt this is
+// useful, but it's here just in case.)
+
+STBVXDEC stbvox_input_description *stbvox_get_input_description(stbvox_mesh_maker *mm);
+// This function call returns a pointer to the stbvox_input_description part
+// of stbvox_mesh_maker (which you should otherwise treat as opaque). You
+// zero this structure, then fill out the relevant pointers to the data
+// describing your voxel object/world.
+//
+// See further documentation at the description of stbvox_input_description below.
+
+STBVXDEC void stbvox_set_input_stride(stbvox_mesh_maker *mm, int x_stride_in_elements, int y_stride_in_elements);
+// This sets the stride between successive elements of the 3D arrays
+// in the stbvox_input_description. Z values are always stored consecutively.
+// (The preferred coordinate system for stbvox is X right, Y forwards, Z up.)
+
+STBVXDEC void stbvox_set_input_range(stbvox_mesh_maker *mm, int x0, int y0, int z0, int x1, int y1, int z1);
+// This sets the range of values in the 3D array for the voxels that
+// the mesh generator will convert. The lower values are inclusive,
+// the higher values are exclusive, so (0,0,0) to (16,16,16) generates
+// mesh data associated with voxels up to (15,15,15) but no higher.
+//
+// The mesh generate generates faces at the boundary between open space
+// and solid space but associates them with the solid space, so if (15,0,0)
+// is open and (16,0,0) is solid, then the mesh will contain the boundary
+// between them if x0 <= 16 and x1 > 16.
+//
+// Note that the mesh generator will access array elements 1 beyond the
+// limits set in these parameters. For example, if you set the limits
+// to be (0,0,0) and (16,16,16), then the generator will access all of
+// the voxels between (-1,-1,-1) and (16,16,16), including (16,16,16).
+// You may have to do pointer arithmetic to make it work.
+//
+// For example, caveview processes mesh chunks that are 32x32x16, but it
+// does this using input buffers that are 34x34x18.
+//
+// The lower limits are x0 >= 0, y0 >= 0, and z0 >= 0.
+//
+// The upper limits are mode dependent, but all the current methods are
+// limited to x1 < 127, y1 < 127, z1 < 255. Note that these are not
+// powers of two; if you want to use power-of-two chunks (to make
+// it efficient to decide which chunk a coordinate falls in), you're
+// limited to at most x1=64, y1=64, z1=128. For classic Minecraft-style
+// worlds with limited vertical extent, I recommend using a single
+// chunk for the entire height, which limits the height to 255 blocks
+// (one less than Minecraft), and only chunk the map in X & Y.
+
+STBVXDEC int stbvox_make_mesh(stbvox_mesh_maker *mm);
+// Call this function to create mesh data for the currently configured
+// set of input data. This appends to the currently configured mesh output
+// buffer. Returns 1 on success. If there is not enough room in the buffer,
+// it outputs as much as it can, and returns 0; you need to switch output
+// buffers (either by calling stbvox_set_buffer to set new buffers, or
+// by copying the data out and calling stbvox_reset_buffers), and then
+// call this function again without changing any of the input parameters.
+//
+// Note that this function appends; you can call it multiple times to
+// build a single mesh. For example, caveview uses chunks that are
+// 32x32x255, but builds the mesh for it by processing 32x32x16 at atime
+// (this is faster as it is reuses the same 34x34x18 input buffers rather
+// than needing 34x34x257 input buffers).
+
+// Once you're done creating a mesh into a given buffer,
+// consider the following functions:
+
+STBVXDEC int stbvox_get_quad_count(stbvox_mesh_maker *mm, int mesh);
+// Returns the number of quads in the mesh currently generated by mm.
+// This is the sum of all consecutive stbvox_make_mesh runs appending
+// to the same buffer. 'mesh' distinguishes between the multiple user
+// meshes available via 'selector' or stbvox_set_default_mesh.
+//
+// Typically you use this function when you're done building the mesh
+// and want to record how to draw it.
+//
+// Note that there are no index buffers; the data stored in the buffers
+// should be drawn as quads (e.g. with GL_QUAD); if your API does not
+// support quads, you can create a single index buffer large enough to
+// draw your largest vertex buffer, and reuse it for every rendering.
+// (Note that if you use 32-bit indices, you'll use 24 bytes of bandwidth
+// per quad, more than the 20 bytes for the vertex/face mesh data.)
+
+STBVXDEC void stbvox_set_mesh_coordinates(stbvox_mesh_maker *mm, int x, int y, int z);
+// Sets the global coordinates for this chunk, such that (0,0,0) relative
+// coordinates will be at (x,y,z) in global coordinates.
+
+STBVXDEC void stbvox_get_bounds(stbvox_mesh_maker *mm, float bounds[2][3]);
+// Returns the bounds for the mesh in global coordinates. Use this
+// for e.g. frustum culling the mesh. @BUG: this just uses the
+// values from stbvox_set_input_range(), so if you build by
+// appending multiple values, this will be wrong, and you need to
+// set stbvox_set_input_range() to the full size. Someday this
+// will switch to tracking the actual bounds of the *mesh*, though.
+
+STBVXDEC void stbvox_get_transform(stbvox_mesh_maker *mm, float transform[3][3]);
+// Returns the 'transform' data for the shader uniforms. It is your
+// job to set this to the shader before drawing the mesh. It is the
+// only uniform that needs to change per-mesh. Note that it is not
+// a 3x3 matrix, but rather a scale to decode fixed point numbers as
+// floats, a translate from relative to global space, and a special
+// translation for texture coordinate generation that avoids
+// floating-point precision issues. @TODO: currently we add the
+// global translation to the vertex, than multiply by modelview,
+// but this means if camera location and vertex are far from the
+// origin, we lose precision. Need to make a special modelview with
+// the translation (or some of it) factored out to avoid this.
+
+STBVXDEC void stbvox_reset_buffers(stbvox_mesh_maker *mm);
+// Call this function if you're done with the current output buffer
+// but want to reuse it (e.g. you're done appending with
+// stbvox_make_mesh and you've copied the data out to your graphics API
+// so can reuse the buffer).
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// RENDERING
+//
+
+STBVXDEC char *stbvox_get_vertex_shader(void);
+// Returns the (currently GLSL-only) vertex shader.
+
+STBVXDEC char *stbvox_get_fragment_shader(void);
+// Returns the (currently GLSL-only) fragment shader.
+// You can override the lighting and fogging calculations
+// by appending data to the end of these; see the #define
+// documentation for more information.
+
+STBVXDEC char *stbvox_get_fragment_shader_alpha_only(void);
+// Returns a slightly cheaper fragment shader that computes
+// alpha but not color. This is useful for e.g. a depth-only
+// pass when using alpha test.
+
+typedef struct stbvox_uniform_info stbvox_uniform_info;
+
+STBVXDEC int stbvox_get_uniform_info(stbvox_uniform_info *info, int uniform);
+// Gets the information about a uniform necessary for you to
+// set up each uniform with a minimal amount of explicit code.
+// See the sample code after the structure definition for stbvox_uniform_info,
+// further down in this header section.
+//
+// "uniform" is from the list immediately following. For many
+// of these, default values are provided which you can set.
+// Most values are shared for most draw calls; e.g. for stateful
+// APIs you can set most of the state only once. Only
+// STBVOX_UNIFORM_transform needs to change per draw call.
+//
+// STBVOX_UNIFORM_texscale
+//    64- or 128-long vec4 array. (128 only if STBVOX_CONFIG_PREFER_TEXBUFFER)
+//    x: scale factor to apply to texture #1. must be a power of two. 1.0 means 'face-sized'
+//    y: scale factor to apply to texture #2. must be a power of two. 1.0 means 'face-sized'
+//    z: blend mode indexed by texture #2. 0.0 is alpha compositing; 1.0 is multiplication.
+//    w: unused currently. @TODO use to support texture animation?
+//
+//    Texscale is indexed by the bottom 6 or 7 bits of the texture id; thus for
+//    example the texture at index 0 in the array and the texture in index 128 of
+//    the array must be scaled the same. This means that if you only have 64 or 128
+//    unique textures, they all get distinct values anyway; otherwise you have
+//    to group them in pairs or sets of four.
+//
+// STBVOX_UNIFORM_ambient
+//    4-long vec4 array:
+//      ambient[0].xyz   - negative of direction of a directional light for half-lambert
+//      ambient[1].rgb   - color of light scaled by NdotL (can be negative)
+//      ambient[2].rgb   - constant light added to above calculation;
+//                         effectively light ranges from ambient[2]-ambient[1] to ambient[2]+ambient[1]
+//      ambient[3].rgb   - fog color for STBVOX_CONFIG_FOG_SMOOTHSTEP
+//      ambient[3].a     - reciprocal of squared distance of farthest fog point (viewing distance)
+
+
+                               //  +----- has a default value
+                               //  |  +-- you should always use the default value
+enum                           //  V  V
+{                              //  ------------------------------------------------
+   STBVOX_UNIFORM_face_data,   //  n      the sampler with the face texture buffer
+   STBVOX_UNIFORM_transform,   //  n      the transform data from stbvox_get_transform
+   STBVOX_UNIFORM_tex_array,   //  n      an array of two texture samplers containing the two texture arrays
+   STBVOX_UNIFORM_texscale,    //  Y      a table of texture properties, see above
+   STBVOX_UNIFORM_color_table, //  Y      64 vec4 RGBA values; a default palette is provided; if A > 1.0, fullbright
+   STBVOX_UNIFORM_normals,     //  Y  Y   table of normals, internal-only
+   STBVOX_UNIFORM_texgen,      //  Y  Y   table of texgen vectors, internal-only
+   STBVOX_UNIFORM_ambient,     //  n      lighting & fog info, see above
+   STBVOX_UNIFORM_camera_pos,  //  Y      camera position in global voxel space (for lighting & fog)
+
+   STBVOX_UNIFORM_count,
+};
+
+enum
+{
+   STBVOX_UNIFORM_TYPE_none,
+   STBVOX_UNIFORM_TYPE_sampler,
+   STBVOX_UNIFORM_TYPE_vec2,
+   STBVOX_UNIFORM_TYPE_vec3,
+   STBVOX_UNIFORM_TYPE_vec4,
+};
+
+struct stbvox_uniform_info
+{
+   int type;                    // which type of uniform
+   int bytes_per_element;       // the size of each uniform array element (e.g. vec3 = 12 bytes)
+   int array_length;            // length of the uniform array
+   char *name;                  // name in the shader @TODO use numeric binding
+   float *default_value;        // if not NULL, you can use this as the uniform pointer
+   int use_tex_buffer;          // if true, then the uniform is a sampler but the data can come from default_value
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Uniform sample code
+//
+
+#if 0
+// Run this once per frame before drawing all the meshes.
+// You still need to separately set the 'transform' uniform for every mesh.
+void setup_uniforms(GLuint shader, float camera_pos[4], GLuint tex1, GLuint tex2)
+{
+   int i;
+   glUseProgram(shader); // so uniform binding works
+   for (i=0; i < STBVOX_UNIFORM_count; ++i) {
+      stbvox_uniform_info sui;
+      if (stbvox_get_uniform_info(&sui, i)) {
+         GLint loc = glGetUniformLocation(shader, sui.name);
+         if (loc != -1) {
+            switch (i) {
+               case STBVOX_UNIFORM_camera_pos: // only needed for fog
+                  glUniform4fv(loc, sui.array_length, camera_pos);
+                  break;
+
+               case STBVOX_UNIFORM_tex_array: {
+                  GLuint tex_unit[2] = { 0, 1 }; // your choice of samplers
+                  glUniform1iv(loc, 2, tex_unit);
+
+                  glActiveTexture(GL_TEXTURE0 + tex_unit[0]); glBindTexture(GL_TEXTURE_2D_ARRAY, tex1);
+                  glActiveTexture(GL_TEXTURE0 + tex_unit[1]); glBindTexture(GL_TEXTURE_2D_ARRAY, tex2);
+                  glActiveTexture(GL_TEXTURE0); // reset to default
+                  break;
+               }
+
+               case STBVOX_UNIFORM_face_data:
+                  glUniform1i(loc, SAMPLER_YOU_WILL_BIND_PER_MESH_FACE_DATA_TO);
+                  break;
+
+               case STBVOX_UNIFORM_ambient:     // you definitely want to override this
+               case STBVOX_UNIFORM_color_table: // you might want to override this
+               case STBVOX_UNIFORM_texscale:    // you may want to override this
+                  glUniform4fv(loc, sui.array_length, sui.default_value);
+                  break;
+
+               case STBVOX_UNIFORM_normals:     // you never want to override this
+               case STBVOX_UNIFORM_texgen:      // you never want to override this
+                  glUniform3fv(loc, sui.array_length, sui.default_value);
+                  break;
+            }
+         }
+      }
+   }
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// INPUT TO MESHING
+//
+
+// Shapes of blocks that aren't always cubes
+enum
+{
+   STBVOX_GEOM_empty,
+   STBVOX_GEOM_knockout,  // creates a hole in the mesh
+   STBVOX_GEOM_solid,
+   STBVOX_GEOM_transp,    // solid geometry, but transparent contents so neighbors generate normally, unless same blocktype
+
+   // following 4 can be represented by vheight as well
+   STBVOX_GEOM_slab_upper,
+   STBVOX_GEOM_slab_lower,
+   STBVOX_GEOM_floor_slope_north_is_top,
+   STBVOX_GEOM_ceil_slope_north_is_bottom,
+
+   STBVOX_GEOM_floor_slope_north_is_top_as_wall_UNIMPLEMENTED,   // same as floor_slope above, but uses wall's texture & texture projection
+   STBVOX_GEOM_ceil_slope_north_is_bottom_as_wall_UNIMPLEMENTED,
+   STBVOX_GEOM_crossed_pair,    // corner-to-corner pairs, with normal vector bumped upwards
+   STBVOX_GEOM_force,           // like GEOM_transp, but faces visible even if neighbor is same type, e.g. minecraft fancy leaves
+
+   // these access vheight input
+   STBVOX_GEOM_floor_vheight_03 = 12,  // diagonal is SW-NE
+   STBVOX_GEOM_floor_vheight_12,       // diagonal is SE-NW
+   STBVOX_GEOM_ceil_vheight_03,
+   STBVOX_GEOM_ceil_vheight_12,
+
+   STBVOX_GEOM_count, // number of geom cases
+};
+
+enum
+{
+   STBVOX_FACE_east,
+   STBVOX_FACE_north,
+   STBVOX_FACE_west,
+   STBVOX_FACE_south,
+   STBVOX_FACE_up,
+   STBVOX_FACE_down,
+
+   STBVOX_FACE_count,
+};
+
+#ifdef STBVOX_CONFIG_BLOCKTYPE_SHORT
+typedef unsigned short stbvox_block_type;
+#else
+typedef unsigned char stbvox_block_type;
+#endif
+
+// 24-bit color
+typedef struct
+{
+   unsigned char r,g,b;
+} stbvox_rgb;
+
+#define STBVOX_COLOR_TEX1_ENABLE   64
+#define STBVOX_COLOR_TEX2_ENABLE  128
+
+// This is the data structure you fill out. Most of the arrays can be
+// NULL, except when one is required to get the value to index another.
+//
+// The compass system used in the following descriptions is:
+//     east means increasing x
+//     north means increasing y
+//     up means increasing z
+struct stbvox_input_description
+{
+   unsigned char lighting_at_vertices;
+   // The default is lighting values (i.e. ambient occlusion) are at block
+   // center, and the vertex light is gathered from those adjacent block
+   // centers that the vertex is facing. This makes smooth lighting
+   // consistent across adjacent faces with the same orientation.
+   //
+   // Setting this flag to non-zero gives you explicit control
+   // of light at each vertex, but now the lighting/ao will be
+   // shared by all vertices at the same point, even if they
+   // have different normals.
+
+   // these are mostly 3D maps you use to define your voxel world, using x_stride and y_stride
+   // note that for cache efficiency, you want to use the block_foo palettes as much as possible instead
+
+   stbvox_rgb *rgb;
+   // Indexed by 3D coordinate.
+   // 24-bit voxel color for STBVOX_CONFIG_MODE = 20 or 21 only
+
+   unsigned char *lighting;
+   // Indexed by 3D coordinate. The lighting value / ambient occlusion
+   // value that is used to define the vertex lighting values.
+   // The raw lighting values are defined at the center of blocks
+   // (or at vertex if 'lighting_at_vertices' is true).
+   //
+   // If the macro STBVOX_CONFIG_ROTATION_IN_LIGHTING is defined,
+   // then an additional 2-bit block rotation value is stored
+   // in this field as well.
+   //
+   // Encode with STBVOX_MAKE_LIGHTING_EXT(lighting,rot)--here
+   // 'lighting' should still be 8 bits, as the macro will
+   // discard the bottom bits automatically. Similarly, if
+   // using STBVOX_CONFIG_VHEIGHT_IN_LIGHTING, encode with
+   // STBVOX_MAKE_LIGHTING_EXT(lighting,vheight).
+   //
+   // (Rationale: rotation needs to be independent of blocktype,
+   // but is only 2 bits so doesn't want to be its own array.
+   // Lighting is the one thing that was likely to already be
+   // in use and that I could easily steal 2 bits from.)
+
+   stbvox_block_type *blocktype;
+   // Indexed by 3D coordinate. This is a core "block type" value, which is used
+   // to index into other arrays; essentially a "palette". This is much more
+   // memory-efficient and performance-friendly than storing the values explicitly,
+   // but only makes sense if the values are always synchronized.
+   //
+   // If a voxel's blocktype is 0, it is assumed to be empty (STBVOX_GEOM_empty),
+   // and no other blocktypes should be STBVOX_GEOM_empty. (Only if you do not
+   // have blocktypes should STBVOX_GEOM_empty ever used.)
+   //
+   // Normally it is an unsigned byte, but you can override it to be
+   // a short if you have too many blocktypes.
+
+   unsigned char *geometry;
+   // Indexed by 3D coordinate. Contains the geometry type for the block.
+   // Also contains a 2-bit rotation for how the whole block is rotated.
+   // Also includes a 2-bit vheight value when using shared vheight values.
+   // See the separate vheight documentation.
+   // Encode with STBVOX_MAKE_GEOMETRY(geom, rot, vheight)
+
+   unsigned char *block_geometry;
+   // Array indexed by blocktype containing the geometry for this block, plus
+   // a 2-bit "simple rotation". Note rotation has limited use since it's not
+   // independent of blocktype.
+   //
+   // Encode with STBVOX_MAKE_GEOMETRY(geom,simple_rot,0)
+
+   unsigned char *block_tex1;
+   // Array indexed by blocktype containing the texture id for texture #1.
+
+   unsigned char (*block_tex1_face)[6];
+   // Array indexed by blocktype and face containing the texture id for texture #1.
+   // The N/E/S/W face choices can be rotated by one of the rotation selectors;
+   // The top & bottom face textures will rotate to match.
+   // Note that it only makes sense to use one of block_tex1 or block_tex1_face;
+   // this pattern repeats throughout and this notice is not repeated.
+
+   unsigned char *tex2;
+   // Indexed by 3D coordinate. Contains the texture id for texture #2
+   // to use on all faces of the block.
+
+   unsigned char *block_tex2;
+   // Array indexed by blocktype containing the texture id for texture #2.
+
+   unsigned char (*block_tex2_face)[6];
+   // Array indexed by blocktype and face containing the texture id for texture #2.
+   // The N/E/S/W face choices can be rotated by one of the rotation selectors;
+   // The top & bottom face textures will rotate to match.
+
+   unsigned char *color;
+   // Indexed by 3D coordinate. Contains the color for all faces of the block.
+   // The core color value is 0..63.
+   // Encode with STBVOX_MAKE_COLOR(color_number, tex1_enable, tex2_enable)
+
+   unsigned char *block_color;
+   // Array indexed by blocktype containing the color value to apply to the faces.
+   // The core color value is 0..63.
+   // Encode with STBVOX_MAKE_COLOR(color_number, tex1_enable, tex2_enable)
+
+   unsigned char (*block_color_face)[6];
+   // Array indexed by blocktype and face containing the color value to apply to that face.
+   // The core color value is 0..63.
+   // Encode with STBVOX_MAKE_COLOR(color_number, tex1_enable, tex2_enable)
+
+   unsigned char *block_texlerp;
+   // Array indexed by blocktype containing 3-bit scalar for texture #2 alpha
+   // (known throughout as 'texlerp'). This is constant over every face even
+   // though the property is potentially per-vertex.
+
+   unsigned char (*block_texlerp_face)[6];
+   // Array indexed by blocktype and face containing 3-bit scalar for texture #2 alpha.
+   // This is constant over the face even though the property is potentially per-vertex.
+
+   unsigned char *block_vheight;
+   // Array indexed by blocktype containing the vheight values for the
+   // top or bottom face of this block. These will rotate properly if the
+   // block is rotated. See discussion of vheight.
+   // Encode with STBVOX_MAKE_VHEIGHT(sw_height, se_height, nw_height, ne_height)
+
+   unsigned char *selector;
+   // Array indexed by 3D coordinates indicating which output mesh to select.
+
+   unsigned char *block_selector;
+   // Array indexed by blocktype indicating which output mesh to select.
+
+   unsigned char *side_texrot;
+   // Array indexed by 3D coordinates encoding 2-bit texture rotations for the
+   // faces on the E/N/W/S sides of the block.
+   // Encode with STBVOX_MAKE_SIDE_TEXROT(rot_e, rot_n, rot_w, rot_s)
+
+   unsigned char *block_side_texrot;
+   // Array indexed by blocktype encoding 2-bit texture rotations for the faces
+   // on the E/N/W/S sides of the block.
+   // Encode with STBVOX_MAKE_SIDE_TEXROT(rot_e, rot_n, rot_w, rot_s)
+
+   unsigned char *overlay;                 // index into palettes listed below
+   // Indexed by 3D coordinate. If 0, there is no overlay. If non-zero,
+   // it indexes into to the below arrays and overrides the values
+   // defined by the blocktype.
+
+   unsigned char (*overlay_tex1)[6];
+   // Array indexed by overlay value and face, containing an override value
+   // for the texture id for texture #1. If 0, the value defined by blocktype
+   // is used.
+
+   unsigned char (*overlay_tex2)[6];
+   // Array indexed by overlay value and face, containing an override value
+   // for the texture id for texture #2. If 0, the value defined by blocktype
+   // is used.
+
+   unsigned char (*overlay_color)[6];
+   // Array indexed by overlay value and face, containing an override value
+   // for the face color. If 0, the value defined by blocktype is used.
+
+   unsigned char *overlay_side_texrot;
+   // Array indexed by overlay value, encoding 2-bit texture rotations for the faces
+   // on the E/N/W/S sides of the block.
+   // Encode with STBVOX_MAKE_SIDE_TEXROT(rot_e, rot_n, rot_w, rot_s)
+
+   unsigned char *rotate;
+   // Indexed by 3D coordinate. Allows independent rotation of several
+   // parts of the voxel, where by rotation I mean swapping textures
+   // and colors between E/N/S/W faces.
+   //    Block: rotates anything indexed by blocktype
+   //    Overlay: rotates anything indexed by overlay
+   //    EColor: rotates faces defined in ecolor_facemask
+   // Encode with STBVOX_MAKE_MATROT(block,overlay,ecolor)
+
+   unsigned char *tex2_for_tex1;
+   // Array indexed by tex1 containing the texture id for texture #2.
+   // You can use this if the two are always/almost-always strictly
+   // correlated (e.g. if tex2 is a detail texture for tex1), as it
+   // will be more efficient (touching fewer cache lines) than using
+   // e.g. block_tex2_face.
+
+   unsigned char *tex2_replace;
+   // Indexed by 3D coordinate. Specifies the texture id for texture #2
+   // to use on a single face of the voxel, which must be E/N/W/S (not U/D).
+   // The texture id is limited to 6 bits unless tex2_facemask is also
+   // defined (see below).
+   // Encode with STBVOX_MAKE_TEX2_REPLACE(tex2, face)
+
+   unsigned char *tex2_facemask;
+   // Indexed by 3D coordinate. Specifies which of the six faces should
+   // have their tex2 replaced by the value of tex2_replace. In this
+   // case, all 8 bits of tex2_replace are used as the texture id.
+   // Encode with STBVOX_MAKE_FACE_MASK(east,north,west,south,up,down)
+
+   unsigned char *extended_color;
+   // Indexed by 3D coordinate. Specifies a value that indexes into
+   // the ecolor arrays below (both of which must be defined).
+
+   unsigned char *ecolor_color;
+   // Indexed by extended_color value, specifies an optional override
+   // for the color value on some faces.
+   // Encode with STBVOX_MAKE_COLOR(color_number, tex1_enable, tex2_enable)
+
+   unsigned char *ecolor_facemask;
+   // Indexed by extended_color value, this specifies which faces the
+   // color in ecolor_color should be applied to. The faces can be
+   // independently rotated by the ecolor value of 'rotate', if it exists.
+   // Encode with STBVOX_MAKE_FACE_MASK(e,n,w,s,u,d)
+
+   unsigned char *color2;
+   // Indexed by 3D coordinates, specifies an alternative color to apply
+   // to some of the faces of the block.
+   // Encode with STBVOX_MAKE_COLOR(color_number, tex1_enable, tex2_enable)
+
+   unsigned char *color2_facemask;
+   // Indexed by 3D coordinates, specifies which faces should use the
+   // color defined in color2. No rotation value is applied.
+   // Encode with STBVOX_MAKE_FACE_MASK(e,n,w,s,u,d)
+
+   unsigned char *color3;
+   // Indexed by 3D coordinates, specifies an alternative color to apply
+   // to some of the faces of the block.
+   // Encode with STBVOX_MAKE_COLOR(color_number, tex1_enable, tex2_enable)
+
+   unsigned char *color3_facemask;
+   // Indexed by 3D coordinates, specifies which faces should use the
+   // color defined in color3. No rotation value is applied.
+   // Encode with STBVOX_MAKE_FACE_MASK(e,n,w,s,u,d)
+
+   unsigned char *texlerp_simple;
+   // Indexed by 3D coordinates, this is the smallest texlerp encoding
+   // that can do useful work. It consits of three values: baselerp,
+   // vertlerp, and face_vertlerp. Baselerp defines the value
+   // to use on all of the faces but one, from the STBVOX_TEXLERP_BASE
+   // values. face_vertlerp is one of the 6 face values (or STBVOX_FACE_NONE)
+   // which specifies the face should use the vertlerp values.
+   // Vertlerp defines a lerp value at every vertex of the mesh.
+   // Thus, one face can have per-vertex texlerp values, and those
+   // values are encoded in the space so that they will be shared
+   // by adjacent faces that also use vertlerp, allowing continuity
+   // (this is used for the "texture crossfade" bit of the release video).
+   // Encode with STBVOX_MAKE_TEXLERP_SIMPLE(baselerp, vertlerp, face_vertlerp)
+
+   // The following texlerp encodings are experimental and maybe not
+   // that useful.
+
+   unsigned char *texlerp;
+   // Indexed by 3D coordinates, this defines four values:
+   //   vertlerp is a lerp value at every vertex of the mesh (using STBVOX_TEXLERP_BASE values).
+   //   ud is the value to use on up and down faces, from STBVOX_TEXLERP_FACE values
+   //   ew is the value to use on east and west faces, from STBVOX_TEXLERP_FACE values
+   //   ns is the value to use on north and south faces, from STBVOX_TEXLERP_FACE values
+   // If any of ud, ew, or ns is STBVOX_TEXLERP_FACE_use_vert, then the
+   // vertlerp values for the vertices are gathered and used for those faces.
+   // Encode with STBVOX_MAKE_TEXLERP(vertlerp,ud,ew,sw)
+
+   unsigned short *texlerp_vert3;
+   // Indexed by 3D coordinates, this works with texlerp and
+   // provides a unique texlerp value for every direction at
+   // every vertex. The same rules of whether faces share values
+   // applies. The STBVOX_TEXLERP_FACE vertlerp value defined in
+   // texlerp is only used for the down direction. The values at
+   // each vertex in other directions are defined in this array,
+   // and each uses the STBVOX_TEXLERP3 values (i.e. full precision
+   // 3-bit texlerp values).
+   // Encode with STBVOX_MAKE_VERT3(vertlerp_e,vertlerp_n,vertlerp_w,vertlerp_s,vertlerp_u)
+
+   unsigned short *texlerp_face3;          // e:3,n:3,w:3,s:3,u:2,d:2
+   // Indexed by 3D coordinates, this provides a compact way to
+   // fully specify the texlerp value indepenendly for every face,
+   // but doesn't allow per-vertex variation. E/N/W/S values are
+   // encoded using STBVOX_TEXLERP3 values, whereas up and down
+   // use STBVOX_TEXLERP_SIMPLE values.
+   // Encode with STBVOX_MAKE_FACE3(face_e,face_n,face_w,face_s,face_u,face_d)
+
+   unsigned char *vheight;                 // STBVOX_MAKE_VHEIGHT   -- sw:2, se:2, nw:2, ne:2, doesn't rotate
+   // Indexed by 3D coordinates, this defines the four
+   // vheight values to use if the geometry is STBVOX_GEOM_vheight*.
+   // See the vheight discussion.
+
+   unsigned char *packed_compact;
+   // Stores block rotation, vheight, and texlerp values:
+   //    block rotation: 2 bits
+   //    vertex vheight: 2 bits
+   //    use_texlerp   : 1 bit
+   //    vertex texlerp: 3 bits
+   // If STBVOX_CONFIG_UP_TEXLERP_PACKED is defined, then 'vertex texlerp' is
+   // used for up faces if use_texlerp is 1. If STBVOX_CONFIG_DOWN_TEXLERP_PACKED
+   // is defined, then 'vertex texlerp' is used for down faces if use_texlerp is 1.
+   // Note if those symbols are defined but packed_compact is NULL, the normal
+   // texlerp default will be used.
+   // Encode with STBVOX_MAKE_PACKED_COMPACT(rot, vheight, texlerp, use_texlerp)
+};
+// @OPTIMIZE allow specializing; build a single struct with all of the
+// 3D-indexed arrays combined so it's AoS instead of SoA for better
+// cache efficiency
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  VHEIGHT DOCUMENTATION
+//
+//  "vheight" is the internal name for the special block types
+//  with sloped tops or bottoms. "vheight" stands for "vertex height".
+//
+//  Note that these blocks are very flexible (there are 256 of them,
+//  although at least 17 of them should never be used), but they
+//  also have a disadvantage that they generate extra invisible
+//  faces; the generator does not currently detect whether adjacent
+//  vheight blocks hide each others sides, so those side faces are
+//  always generated. For a continuous ground terrain, this means
+//  that you may generate 5x as many quads as needed. See notes
+//  on "improvements for shipping products" in the introduction.
+
+enum
+{
+   STBVOX_VERTEX_HEIGHT_0,
+   STBVOX_VERTEX_HEIGHT_half,
+   STBVOX_VERTEX_HEIGHT_1,
+   STBVOX_VERTEX_HEIGHT_one_and_a_half,
+};
+// These are the "vheight" values. Vheight stands for "vertex height".
+// The idea is that for a "floor vheight" block, you take a cube and
+// reposition the top-most vertices at various heights as specified by
+// the vheight values. Similarly, a "ceiling vheight" block takes a
+// cube and repositions the bottom-most vertices.
+//
+// A floor block only adjusts the top four vertices; the bottom four vertices
+// remain at the bottom of the block. The height values are 2 bits,
+// measured in halves of a block; so you can specify heights of 0/2,
+// 1/2, 2/2, or 3/2. 0 is the bottom of the block, 1 is halfway
+// up the block, 2 is the top of the block, and 3 is halfway up the
+// next block (and actually outside of the block). The value 3 is
+// actually legal for floor vheight (but not ceiling), and allows you to:
+//
+//     (A) have smoother terrain by having slopes that cross blocks,
+//         e.g. (1,1,3,3) is a regular-seeming slope halfway between blocks
+//     (B) make slopes steeper than 45-degrees, e.g. (0,0,3,3)
+//
+// (Because only z coordinates have half-block precision, and x&y are
+// limited to block corner precision, it's not possible to make these
+// things "properly" out of blocks, e.g. a half-slope block on its side
+// or a sloped block halfway between blocks that's made out of two blocks.)
+//
+// If you define STBVOX_CONFIG_OPTIMIZED_VHEIGHT, then the top face
+// (or bottom face for a ceiling vheight block) will be drawn as a
+// single quad even if the four vertex heights aren't planar, and a
+// single normal will be used over the entire quad. If you
+// don't define it, then if the top face is non-planar, it will be
+// split into two triangles, each with their own normal/lighting.
+// (Note that since all output from stb_voxel_render is quad meshes,
+// triangles are actually rendered as degenerate quads.) In this case,
+// the distinction between STBVOX_GEOM_floor_vheight_03 and
+// STBVOX_GEOM_floor_vheight_12 comes into play; the former introduces
+// an edge from the SW to NE corner (i.e. from <0,0,?> to <1,1,?>),
+// while the latter introduces an edge from the NW to SE corner
+// (i.e. from <0,1,?> to <1,0,?>.) For a "lazy mesh" look, use
+// exclusively _03 or _12. For a "classic mesh" look, alternate
+// _03 and _12 in a checkerboard pattern. For a "smoothest surface"
+// look, choose the edge based on actual vertex heights.
+//
+// The four vertex heights can come from several places. The simplest
+// encoding is to just use the 'vheight' parameter which stores four
+// explicit vertex heights for every block. This allows total independence,
+// but at the cost of the largest memory usage, 1 byte per 3D block.
+// Encode this with STBVOX_MAKE_VHEIGHT(vh_sw, vh_se, vh_nw, vh_ne).
+// These coordinates are absolute, not affected by block rotations.
+//
+// An alternative if you just want to encode some very specific block
+// types, not all the possibilities--say you just want half-height slopes,
+// so you want (0,0,1,1) and (1,1,2,2)--then you can use block_vheight
+// to specify them. The geometry rotation will cause block_vheight values
+// to be rotated (because it's as if you're just defining a type of
+// block). This value is also encoded with STBVOX_MAKE_VHEIGHT.
+//
+// If you want to save memory and you're creating a "continuous ground"
+// sort of effect, you can make each vertex of the lattice share the
+// vheight value; that is, two adjacent blocks that share a vertex will
+// always get the same vheight value for that vertex. Then you need to
+// store two bits of vheight for every block, which you do by storing it
+// as part another data structure. Store the south-west vertex's vheight
+// with the block. You can either use the "geometry" mesh variable (it's
+// a parameter to STBVOX_MAKE_GEOMETRY) or you can store it in the
+// "lighting" mesh variable if you defined STBVOX_CONFIG_VHEIGHT_IN_LIGHTING,
+// using STBVOX_MAKE_LIGHTING_EXT(lighting,vheight).
+//
+// Note that if you start with a 2D height map and generate vheight data from
+// it, you don't necessarily store only one value per (x,y) coordinate,
+// as the same value may need to be set up at multiple z heights. For
+// example, if height(8,8) = 13.5, then you want the block at (8,8,13)
+// to store STBVOX_VERTEX_HEIGHT_half, and this will be used by blocks
+// at (7,7,13), (8,7,13), (7,8,13), and (8,8,13). However, if you're
+// allowing steep slopes, it might be the case that you have a block
+// at (7,7,12) which is supposed to stick up to 13.5; that means
+// you also need to store STBVOX_VERTEX_HEIGHT_one_and_a_half at (8,8,12).
+
+enum
+{
+   STBVOX_TEXLERP_FACE_0,
+   STBVOX_TEXLERP_FACE_half,
+   STBVOX_TEXLERP_FACE_1,
+   STBVOX_TEXLERP_FACE_use_vert,
+};
+
+enum
+{
+   STBVOX_TEXLERP_BASE_0,    // 0.0
+   STBVOX_TEXLERP_BASE_2_7,  // 2/7
+   STBVOX_TEXLERP_BASE_5_7,  // 4/7
+   STBVOX_TEXLERP_BASE_1     // 1.0
+};
+
+enum
+{
+   STBVOX_TEXLERP3_0_8,
+   STBVOX_TEXLERP3_1_8,
+   STBVOX_TEXLERP3_2_8,
+   STBVOX_TEXLERP3_3_8,
+   STBVOX_TEXLERP3_4_8,
+   STBVOX_TEXLERP3_5_8,
+   STBVOX_TEXLERP3_6_8,
+   STBVOX_TEXLERP3_7_8,
+};
+
+#define STBVOX_FACE_NONE  7
+
+#define STBVOX_BLOCKTYPE_EMPTY    0
+
+#ifdef STBVOX_BLOCKTYPE_SHORT
+#define STBVOX_BLOCKTYPE_HOLE  65535
+#else
+#define STBVOX_BLOCKTYPE_HOLE    255
+#endif
+
+#define STBVOX_MAKE_GEOMETRY(geom, rotate, vheight) ((geom) + (rotate)*16 + (vheight)*64)
+#define STBVOX_MAKE_VHEIGHT(v_sw, v_se, v_nw, v_ne) ((v_sw) + (v_se)*4 + (v_nw)*16 + (v_ne)*64)
+#define STBVOX_MAKE_MATROT(block, overlay, color)  ((block) + (overlay)*4 + (color)*64)
+#define STBVOX_MAKE_TEX2_REPLACE(tex2, tex2_replace_face) ((tex2) + ((tex2_replace_face) & 3)*64)
+#define STBVOX_MAKE_TEXLERP(ns2, ew2, ud2, vert)  ((ew2) + (ns2)*4 + (ud2)*16 + (vert)*64)
+#define STBVOX_MAKE_TEXLERP_SIMPLE(baselerp,vert,face)   ((vert)*32 + (face)*4 + (baselerp))
+#define STBVOX_MAKE_TEXLERP1(vert,e2,n2,w2,s2,u4,d2) STBVOX_MAKE_TEXLERP(s2, w2, d2, vert)
+#define STBVOX_MAKE_TEXLERP2(vert,e2,n2,w2,s2,u4,d2) ((u2)*16 + (n2)*4 + (s2))
+#define STBVOX_MAKE_FACE_MASK(e,n,w,s,u,d)  ((e)+(n)*2+(w)*4+(s)*8+(u)*16+(d)*32)
+#define STBVOX_MAKE_SIDE_TEXROT(e,n,w,s) ((e)+(n)*4+(w)*16+(s)*64)
+#define STBVOX_MAKE_COLOR(color,t1,t2) ((color)+(t1)*64+(t2)*128)
+#define STBVOX_MAKE_TEXLERP_VERT3(e,n,w,s,u)   ((e)+(n)*8+(w)*64+(s)*512+(u)*4096)
+#define STBVOX_MAKE_TEXLERP_FACE3(e,n,w,s,u,d) ((e)+(n)*8+(w)*64+(s)*512+(u)*4096+(d)*16384)
+#define STBVOX_MAKE_PACKED_COMPACT(rot, vheight, texlerp, def) ((rot)+4*(vheight)+16*(use)+32*(texlerp))
+
+#define STBVOX_MAKE_LIGHTING_EXT(lighting, rot)  (((lighting)&~3)+(rot))
+#define STBVOX_MAKE_LIGHTING(lighting)       (lighting)
+
+#ifndef STBVOX_MAX_MESHES
+#define STBVOX_MAX_MESHES      2           // opaque & transparent
+#endif
+
+#define STBVOX_MAX_MESH_SLOTS  3           // one vertex & two faces, or two vertex and one face
+
+
+// don't mess with this directly, it's just here so you can
+// declare stbvox_mesh_maker on the stack or as a global
+struct stbvox_mesh_maker
+{
+   stbvox_input_description input;
+   int cur_x, cur_y, cur_z;       // last unprocessed voxel if it splits into multiple buffers
+   int x0,y0,z0,x1,y1,z1;
+   int x_stride_in_bytes;
+   int y_stride_in_bytes;
+   int config_dirty;
+   int default_mesh;
+   unsigned int tags;
+
+   int cube_vertex_offset[6][4]; // this allows access per-vertex data stored block-centered (like texlerp, ambient)
+   int vertex_gather_offset[6][4];
+
+   int pos_x,pos_y,pos_z;
+   int full;
+
+   // computed from user input
+   char *output_cur   [STBVOX_MAX_MESHES][STBVOX_MAX_MESH_SLOTS];
+   char *output_end   [STBVOX_MAX_MESHES][STBVOX_MAX_MESH_SLOTS];
+   char *output_buffer[STBVOX_MAX_MESHES][STBVOX_MAX_MESH_SLOTS];
+   int   output_len   [STBVOX_MAX_MESHES][STBVOX_MAX_MESH_SLOTS];
+
+   // computed from config
+   int   output_size  [STBVOX_MAX_MESHES][STBVOX_MAX_MESH_SLOTS]; // per quad
+   int   output_step  [STBVOX_MAX_MESHES][STBVOX_MAX_MESH_SLOTS]; // per vertex or per face, depending
+   int   num_mesh_slots;
+
+   float default_tex_scale[128][2];
+};
+
+#endif //  INCLUDE_STB_VOXEL_RENDER_H
+
+
+#ifdef STB_VOXEL_RENDER_IMPLEMENTATION
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h> // memset
+
+// have to use our own names to avoid the _MSC_VER path having conflicting type names
+#ifndef _MSC_VER
+   #include <stdint.h>
+   typedef uint16_t stbvox_uint16;
+   typedef uint32_t stbvox_uint32;
+#else
+   typedef unsigned short stbvox_uint16;
+   typedef unsigned int   stbvox_uint32;
+#endif
+
+#ifdef _MSC_VER
+   #define STBVOX_NOTUSED(v)  (void)(v)
+#else
+   #define STBVOX_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+
+
+#ifndef STBVOX_CONFIG_MODE
+#error "Must defined STBVOX_CONFIG_MODE to select the mode"
+#endif
+
+#if defined(STBVOX_CONFIG_ROTATION_IN_LIGHTING) && defined(STBVOX_CONFIG_VHEIGHT_IN_LIGHTING)
+#error "Can't store both rotation and vheight in lighting"
+#endif
+
+
+// The following are candidate voxel modes. Only modes 0, 1, and 20, and 21 are
+// currently implemented. Reducing the storage-per-quad further
+// shouldn't improve performance, although obviously it allow you
+// to create larger worlds without streaming.
+//
+//
+//                      -----------  Two textures -----------       -- One texture --     ---- Color only ----
+//            Mode:     0     1     2     3     4     5     6        10    11    12      20    21    22    23    24
+// ============================================================================================================
+//  uses Tex Buffer     n     Y     Y     Y     Y     Y     Y         Y     Y     Y       n     Y     Y     Y     Y
+//   bytes per quad    32    20    14    12    10     6     6         8     8     4      32    20    10     6     4
+//       non-blocks   all   all   some  some  some slabs stairs     some  some  none    all   all  slabs slabs  none
+//             tex1   256   256   256   256   256   256   256       256   256   256       n     n     n     n     n
+//             tex2   256   256   256   256   256   256   128         n     n     n       n     n     n     n     n
+//           colors    64    64    64    64    64    64    64         8     n     n     2^24  2^24  2^24  2^24  256
+//        vertex ao     Y     Y     Y     Y     Y     n     n         Y     Y     n       Y     Y     Y     n     n
+//   vertex texlerp     Y     Y     Y     n     n     n     n         -     -     -       -     -     -     -     -
+//      x&y extents   127   127   128    64    64   128    64        64   128   128     127   127   128   128   128
+//        z extents   255   255   128    64?   64?   64    64        32    64   128     255   255   128    64   128
+
+// not sure why I only wrote down the above "result data" and didn't preserve
+// the vertex formats, but here I've tried to reconstruct the designs...
+//     mode # 3 is wrong, one byte too large, but they may have been an error originally
+
+//            Mode:     0     1     2     3     4     5     6        10    11    12      20    21    22    23    24
+// =============================================================================================================
+//   bytes per quad    32    20    14    12    10     6     6         8     8     4            20    10     6     4
+//
+//    vertex x bits     7     7     0     6     0     0     0         0     0     0             7     0     0     0
+//    vertex y bits     7     7     0     0     0     0     0         0     0     0             7     0     0     0
+//    vertex z bits     9     9     7     4     2     0     0         2     2     0             9     2     0     0
+//   vertex ao bits     6     6     6     6     6     0     0         6     6     0             6     6     0     0
+//  vertex txl bits     3     3     3     0     0     0     0         0     0     0            (3)    0     0     0
+//
+//   face tex1 bits    (8)    8     8     8     8     8     8         8     8     8
+//   face tex2 bits    (8)    8     8     8     8     8     7         -     -     -
+//  face color bits    (8)    8     8     8     8     8     8         3     0     0            24    24    24     8
+// face normal bits    (8)    8     8     8     6     4     7         4     4     3             8     3     4     3
+//      face x bits                 7     0     6     7     6         6     7     7             0     7     7     7
+//      face y bits                 7     6     6     7     6         6     7     7             0     7     7     7
+//      face z bits                 2     2     6     6     6         5     6     7             0     7     6     7
+
+
+#if STBVOX_CONFIG_MODE==0 || STBVOX_CONFIG_MODE==1
+
+   #define STBVOX_ICONFIG_VERTEX_32
+   #define STBVOX_ICONFIG_FACE1_1
+
+#elif STBVOX_CONFIG_MODE==20 || STBVOX_CONFIG_MODE==21
+
+   #define STBVOX_ICONFIG_VERTEX_32
+   #define STBVOX_ICONFIG_FACE1_1
+   #define STBVOX_ICONFIG_UNTEXTURED
+
+#else
+#error "Selected value of STBVOX_CONFIG_MODE is not supported"
+#endif
+
+#if STBVOX_CONFIG_MODE==0 || STBVOX_CONFIG_MODE==20
+#define STBVOX_ICONFIG_FACE_ATTRIBUTE
+#endif
+
+#ifndef STBVOX_CONFIG_HLSL
+// the fallback if all others are exhausted is GLSL
+#define STBVOX_ICONFIG_GLSL
+#endif
+
+#ifdef STBVOX_CONFIG_OPENGL_MODELVIEW
+#define STBVOX_ICONFIG_OPENGL_3_1_COMPATIBILITY
+#endif
+
+#if defined(STBVOX_ICONFIG_VERTEX_32)
+   typedef stbvox_uint32 stbvox_mesh_vertex;
+   #define stbvox_vertex_encode(x,y,z,ao,texlerp) \
+      ((stbvox_uint32) ((x)+((y)<<7)+((z)<<14)+((ao)<<23)+((texlerp)<<29)))
+#elif defined(STBVOX_ICONFIG_VERTEX_16_1)  // mode=2
+   typedef stbvox_uint16 stbvox_mesh_vertex;
+   #define stbvox_vertex_encode(x,y,z,ao,texlerp) \
+      ((stbvox_uint16) ((z)+((ao)<<7)+((texlerp)<<13)
+#elif defined(STBVOX_ICONFIG_VERTEX_16_2)  // mode=3
+   typedef stbvox_uint16 stbvox_mesh_vertex;
+   #define stbvox_vertex_encode(x,y,z,ao,texlerp) \
+      ((stbvox_uint16) ((x)+((z)<<6))+((ao)<<10))
+#elif defined(STBVOX_ICONFIG_VERTEX_8)
+   typedef stbvox_uint8 stbvox_mesh_vertex;
+   #define stbvox_vertex_encode(x,y,z,ao,texlerp) \
+      ((stbvox_uint8) ((z)+((ao)<<6))
+#else
+   #error "internal error, no vertex type"
+#endif
+
+#ifdef STBVOX_ICONFIG_FACE1_1
+   typedef struct
+   {
+      unsigned char tex1,tex2,color,face_info;
+   } stbvox_mesh_face;
+#else
+   #error "internal error, no face type"
+#endif
+
+
+// 20-byte quad format:
+//
+// per vertex:
+//
+//     x:7
+//     y:7
+//     z:9
+//     ao:6
+//     tex_lerp:3
+//
+// per face:
+//
+//     tex1:8
+//     tex2:8
+//     face:8
+//     color:8
+
+
+// Faces:
+//
+// Faces use the bottom 3 bits to choose the texgen
+// mode, and all the bits to choose the normal.
+// Thus the bottom 3 bits have to be:
+//      e, n, w, s, u, d, u, d
+//
+// These use compact names so tables are readable
+
+enum
+{
+   STBVF_e,
+   STBVF_n,
+   STBVF_w,
+   STBVF_s,
+   STBVF_u,
+   STBVF_d,
+   STBVF_eu,
+   STBVF_ed,
+
+   STBVF_eu_wall,
+   STBVF_nu_wall,
+   STBVF_wu_wall,
+   STBVF_su_wall,
+   STBVF_ne_u,
+   STBVF_ne_d,
+   STBVF_nu,
+   STBVF_nd,
+
+   STBVF_ed_wall,
+   STBVF_nd_wall,
+   STBVF_wd_wall,
+   STBVF_sd_wall,
+   STBVF_nw_u,
+   STBVF_nw_d,
+   STBVF_wu,
+   STBVF_wd,
+
+   STBVF_ne_u_cross,
+   STBVF_nw_u_cross,
+   STBVF_sw_u_cross,
+   STBVF_se_u_cross,
+   STBVF_sw_u,
+   STBVF_sw_d,
+   STBVF_su,
+   STBVF_sd,
+
+   // @TODO we need more than 5 bits to encode the normal to fit the following
+   // so for now we use the right projection but the wrong normal
+   STBVF_se_u = STBVF_su,
+   STBVF_se_d = STBVF_sd,
+
+   STBVF_count,
+};
+
+/////////////////////////////////////////////////////////////////////////////
+//
+//    tables -- i'd prefer if these were at the end of the file, but: C++
+//
+
+static float stbvox_default_texgen[2][32][3] =
+{
+   { {  0, 1,0 }, { 0, 0, 1 }, {  0,-1,0 }, { 0, 0,-1 },
+     { -1, 0,0 }, { 0, 0, 1 }, {  1, 0,0 }, { 0, 0,-1 },
+     {  0,-1,0 }, { 0, 0, 1 }, {  0, 1,0 }, { 0, 0,-1 },
+     {  1, 0,0 }, { 0, 0, 1 }, { -1, 0,0 }, { 0, 0,-1 },
+
+     {  1, 0,0 }, { 0, 1, 0 }, { -1, 0,0 }, { 0,-1, 0 },
+     { -1, 0,0 }, { 0,-1, 0 }, {  1, 0,0 }, { 0, 1, 0 },
+     {  1, 0,0 }, { 0, 1, 0 }, { -1, 0,0 }, { 0,-1, 0 },
+     { -1, 0,0 }, { 0,-1, 0 }, {  1, 0,0 }, { 0, 1, 0 },
+   },
+   { { 0, 0,-1 }, {  0, 1,0 }, { 0, 0, 1 }, {  0,-1,0 },
+     { 0, 0,-1 }, { -1, 0,0 }, { 0, 0, 1 }, {  1, 0,0 },
+     { 0, 0,-1 }, {  0,-1,0 }, { 0, 0, 1 }, {  0, 1,0 },
+     { 0, 0,-1 }, {  1, 0,0 }, { 0, 0, 1 }, { -1, 0,0 },
+
+     { 0,-1, 0 }, {  1, 0,0 }, { 0, 1, 0 }, { -1, 0,0 },
+     { 0, 1, 0 }, { -1, 0,0 }, { 0,-1, 0 }, {  1, 0,0 },
+     { 0,-1, 0 }, {  1, 0,0 }, { 0, 1, 0 }, { -1, 0,0 },
+     { 0, 1, 0 }, { -1, 0,0 }, { 0,-1, 0 }, {  1, 0,0 },
+   },
+};
+
+#define STBVOX_RSQRT2   0.7071067811865f
+#define STBVOX_RSQRT3   0.5773502691896f
+
+static float stbvox_default_normals[32][3] =
+{
+   { 1,0,0 },  // east
+   { 0,1,0 },  // north
+   { -1,0,0 }, // west
+   { 0,-1,0 }, // south
+   { 0,0,1 },  // up
+   { 0,0,-1 }, // down
+   {  STBVOX_RSQRT2,0, STBVOX_RSQRT2 }, // east & up
+   {  STBVOX_RSQRT2,0, -STBVOX_RSQRT2 }, // east & down
+
+   {  STBVOX_RSQRT2,0, STBVOX_RSQRT2 }, // east & up
+   { 0, STBVOX_RSQRT2, STBVOX_RSQRT2 }, // north & up
+   { -STBVOX_RSQRT2,0, STBVOX_RSQRT2 }, // west & up
+   { 0,-STBVOX_RSQRT2, STBVOX_RSQRT2 }, // south & up
+   {  STBVOX_RSQRT3, STBVOX_RSQRT3, STBVOX_RSQRT3 }, // ne & up
+   {  STBVOX_RSQRT3, STBVOX_RSQRT3,-STBVOX_RSQRT3 }, // ne & down
+   { 0, STBVOX_RSQRT2, STBVOX_RSQRT2 }, // north & up
+   { 0, STBVOX_RSQRT2, -STBVOX_RSQRT2 }, // north & down
+
+   {  STBVOX_RSQRT2,0, -STBVOX_RSQRT2 }, // east & down
+   { 0, STBVOX_RSQRT2, -STBVOX_RSQRT2 }, // north & down
+   { -STBVOX_RSQRT2,0, -STBVOX_RSQRT2 }, // west & down
+   { 0,-STBVOX_RSQRT2, -STBVOX_RSQRT2 }, // south & down
+   { -STBVOX_RSQRT3, STBVOX_RSQRT3, STBVOX_RSQRT3 }, // NW & up
+   { -STBVOX_RSQRT3, STBVOX_RSQRT3,-STBVOX_RSQRT3 }, // NW & down
+   { -STBVOX_RSQRT2,0, STBVOX_RSQRT2 }, // west & up
+   { -STBVOX_RSQRT2,0, -STBVOX_RSQRT2 }, // west & down
+
+   {  STBVOX_RSQRT3, STBVOX_RSQRT3,STBVOX_RSQRT3 }, // NE & up crossed
+   { -STBVOX_RSQRT3, STBVOX_RSQRT3,STBVOX_RSQRT3 }, // NW & up crossed
+   { -STBVOX_RSQRT3,-STBVOX_RSQRT3,STBVOX_RSQRT3 }, // SW & up crossed
+   {  STBVOX_RSQRT3,-STBVOX_RSQRT3,STBVOX_RSQRT3 }, // SE & up crossed
+   { -STBVOX_RSQRT3,-STBVOX_RSQRT3, STBVOX_RSQRT3 }, // SW & up
+   { -STBVOX_RSQRT3,-STBVOX_RSQRT3,-STBVOX_RSQRT3 }, // SW & up
+   { 0,-STBVOX_RSQRT2, STBVOX_RSQRT2 }, // south & up
+   { 0,-STBVOX_RSQRT2, -STBVOX_RSQRT2 }, // south & down
+};
+
+static float stbvox_default_texscale[128][4] =
+{
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+   {1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},{1,1,0,0},
+};
+
+static unsigned char stbvox_default_palette_compact[64][3] =
+{
+   { 255,255,255 }, { 238,238,238 }, { 221,221,221 }, { 204,204,204 },
+   { 187,187,187 }, { 170,170,170 }, { 153,153,153 }, { 136,136,136 },
+   { 119,119,119 }, { 102,102,102 }, {  85, 85, 85 }, {  68, 68, 68 },
+   {  51, 51, 51 }, {  34, 34, 34 }, {  17, 17, 17 }, {   0,  0,  0 },
+   { 255,240,240 }, { 255,220,220 }, { 255,160,160 }, { 255, 32, 32 },
+   { 200,120,160 }, { 200, 60,150 }, { 220,100,130 }, { 255,  0,128 },
+   { 240,240,255 }, { 220,220,255 }, { 160,160,255 }, {  32, 32,255 },
+   { 120,160,200 }, {  60,150,200 }, { 100,130,220 }, {   0,128,255 },
+   { 240,255,240 }, { 220,255,220 }, { 160,255,160 }, {  32,255, 32 },
+   { 160,200,120 }, { 150,200, 60 }, { 130,220,100 }, { 128,255,  0 },
+   { 255,255,240 }, { 255,255,220 }, { 220,220,180 }, { 255,255, 32 },
+   { 200,160,120 }, { 200,150, 60 }, { 220,130,100 }, { 255,128,  0 },
+   { 255,240,255 }, { 255,220,255 }, { 220,180,220 }, { 255, 32,255 },
+   { 160,120,200 }, { 150, 60,200 }, { 130,100,220 }, { 128,  0,255 },
+   { 240,255,255 }, { 220,255,255 }, { 180,220,220 }, {  32,255,255 },
+   { 120,200,160 }, {  60,200,150 }, { 100,220,130 }, {   0,255,128 },
+};
+
+static float stbvox_default_ambient[4][4] =
+{
+   { 0,0,1      ,0 }, // reversed lighting direction
+   { 0.5,0.5,0.5,0 }, // directional color
+   { 0.5,0.5,0.5,0 }, // constant color
+   { 0.5,0.5,0.5,1.0f/1000.0f/1000.0f }, // fog data for simple_fog
+};
+
+static float stbvox_default_palette[64][4];
+
+static void stbvox_build_default_palette(void)
+{
+   int i;
+   for (i=0; i < 64; ++i) {
+      stbvox_default_palette[i][0] = stbvox_default_palette_compact[i][0] / 255.0f;
+      stbvox_default_palette[i][1] = stbvox_default_palette_compact[i][1] / 255.0f;
+      stbvox_default_palette[i][2] = stbvox_default_palette_compact[i][2] / 255.0f;
+      stbvox_default_palette[i][3] = 1.0f;
+   }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Shaders
+//
+
+#if defined(STBVOX_ICONFIG_OPENGL_3_1_COMPATIBILITY)
+   #define STBVOX_SHADER_VERSION "#version 150 compatibility\n"
+#elif defined(STBVOX_ICONFIG_OPENGL_3_0)
+   #define STBVOX_SHADER_VERSION "#version 130\n"
+#elif defined(STBVOX_ICONFIG_GLSL)
+   #define STBVOX_SHADER_VERSION "#version 150\n"
+#else
+   #define STBVOX_SHADER_VERSION ""
+#endif
+
+static const char *stbvox_vertex_program =
+{
+      STBVOX_SHADER_VERSION
+
+   #ifdef STBVOX_ICONFIG_FACE_ATTRIBUTE  // NOT TAG_face_sampled
+      "in uvec4 attr_face;\n"
+   #else
+      "uniform usamplerBuffer facearray;\n"
+   #endif
+
+   #ifdef STBVOX_ICONFIG_FACE_ARRAY_2
+      "uniform usamplerBuffer facearray2;\n"
+   #endif
+
+      // vertex input data
+      "in uint attr_vertex;\n"
+
+      // per-buffer data
+      "uniform vec3 transform[3];\n"
+
+      // per-frame data
+      "uniform vec4 camera_pos;\n"  // 4th value is used for arbitrary hacking
+
+      // to simplify things, we avoid using more than 256 uniform vectors
+      // in fragment shader to avoid possible 1024 component limit, so
+      // we access this table in the fragment shader.
+      "uniform vec3 normal_table[32];\n"
+
+      #ifndef STBVOX_CONFIG_OPENGL_MODELVIEW
+         "uniform mat4x4 model_view;\n"
+      #endif
+
+      // fragment output data
+      "flat out uvec4  facedata;\n"
+      "     out  vec3  voxelspace_pos;\n"
+      "     out  vec3  vnormal;\n"
+      "     out float  texlerp;\n"
+      "     out float  amb_occ;\n"
+
+      // @TODO handle the HLSL way to do this
+      "void main()\n"
+      "{\n"
+      #ifdef STBVOX_ICONFIG_FACE_ATTRIBUTE
+         "   facedata = attr_face;\n"
+      #else
+         "   int faceID = gl_VertexID >> 2;\n"
+         "   facedata   = texelFetch(facearray, faceID);\n"
+      #endif
+
+      // extract data for vertex
+      "   vec3 offset;\n"
+      "   offset.x = float( (attr_vertex       ) & 127u );\n"             // a[0..6]
+      "   offset.y = float( (attr_vertex >>  7u) & 127u );\n"             // a[7..13]
+      "   offset.z = float( (attr_vertex >> 14u) & 511u );\n"             // a[14..22]
+      "   amb_occ  = float( (attr_vertex >> 23u) &  63u ) / 63.0;\n"      // a[23..28]
+      "   texlerp  = float( (attr_vertex >> 29u)        ) /  7.0;\n"      // a[29..31]
+
+      "   vnormal = normal_table[(facedata.w>>2u) & 31u];\n"
+      "   voxelspace_pos = offset * transform[0];\n"  // mesh-to-object scale
+      "   vec3 position  = voxelspace_pos + transform[1];\n"  // mesh-to-object translate
+
+      #ifdef STBVOX_DEBUG_TEST_NORMALS
+         "   if ((facedata.w & 28u) == 16u || (facedata.w & 28u) == 24u)\n"
+         "      position += vnormal.xyz * camera_pos.w;\n"
+      #endif
+
+      #ifndef STBVOX_CONFIG_OPENGL_MODELVIEW
+         "   gl_Position = model_view * vec4(position,1.0);\n"
+      #else
+         "   gl_Position = gl_ModelViewProjectionMatrix * vec4(position,1.0);\n"
+      #endif
+
+      "}\n"
+};
+
+
+static const char *stbvox_fragment_program =
+{
+      STBVOX_SHADER_VERSION
+
+      // rlerp is lerp but with t on the left, like god intended
+      #if defined(STBVOX_ICONFIG_GLSL)
+         "#define rlerp(t,x,y) mix(x,y,t)\n"
+      #elif defined(STBVOX_CONFIG_HLSL)
+         "#define rlerp(t,x,y) lerp(x,y,t)\n"
+      #else
+         #error "need definition of rlerp()"
+      #endif
+
+
+      // vertex-shader output data
+      "flat in uvec4  facedata;\n"
+      "     in  vec3  voxelspace_pos;\n"
+      "     in  vec3  vnormal;\n"
+      "     in float  texlerp;\n"
+      "     in float  amb_occ;\n"
+
+      // per-buffer data
+      "uniform vec3 transform[3];\n"
+
+      // per-frame data
+      "uniform vec4 camera_pos;\n"  // 4th value is used for arbitrary hacking
+
+      // probably constant data
+      "uniform vec4 ambient[4];\n"
+
+      #ifndef STBVOX_ICONFIG_UNTEXTURED
+         // generally constant data
+         "uniform sampler2DArray tex_array[2];\n"
+
+         #ifdef STBVOX_CONFIG_PREFER_TEXBUFFER
+            "uniform samplerBuffer color_table;\n"
+            "uniform samplerBuffer texscale;\n"
+            "uniform samplerBuffer texgen;\n"
+         #else
+            "uniform vec4 color_table[64];\n"
+            "uniform vec4 texscale[64];\n" // instead of 128, to avoid running out of uniforms
+            "uniform vec3 texgen[64];\n"
+         #endif
+      #endif
+
+      "out vec4  outcolor;\n"
+
+      #if defined(STBVOX_CONFIG_LIGHTING) || defined(STBVOX_CONFIG_LIGHTING_SIMPLE)
+      "vec3 compute_lighting(vec3 pos, vec3 norm, vec3 albedo, vec3 ambient);\n"
+      #endif
+      #if defined(STBVOX_CONFIG_FOG) || defined(STBVOX_CONFIG_FOG_SMOOTHSTEP)
+      "vec3 compute_fog(vec3 color, vec3 relative_pos, float fragment_alpha);\n"
+      #endif
+
+      "void main()\n"
+      "{\n"
+      "   vec3 albedo;\n"
+      "   float fragment_alpha;\n"
+
+      #ifndef STBVOX_ICONFIG_UNTEXTURED
+         // unpack the values
+         "   uint tex1_id = facedata.x;\n"
+         "   uint tex2_id = facedata.y;\n"
+         "   uint texprojid = facedata.w & 31u;\n"
+         "   uint color_id  = facedata.z;\n"
+
+         #ifndef STBVOX_CONFIG_PREFER_TEXBUFFER
+            // load from uniforms / texture buffers
+            "   vec3 texgen_s = texgen[texprojid];\n"
+            "   vec3 texgen_t = texgen[texprojid+32u];\n"
+            "   float tex1_scale = texscale[tex1_id & 63u].x;\n"
+            "   vec4 color = color_table[color_id & 63u];\n"
+            #ifndef STBVOX_CONFIG_DISABLE_TEX2
+            "   vec4 tex2_props = texscale[tex2_id & 63u];\n"
+            #endif
+         #else
+            "   vec3 texgen_s = texelFetch(texgen, int(texprojid)).xyz;\n"
+            "   vec3 texgen_t = texelFetch(texgen, int(texprojid+32u)).xyz;\n"
+            "   float tex1_scale = texelFetch(texscale, int(tex1_id & 127u)).x;\n"
+            "   vec4 color = texelFetch(color_table, int(color_id & 63u));\n"
+            #ifndef STBVOX_CONFIG_DISABLE_TEX2
+            "   vec4 tex2_props = texelFetch(texscale, int(tex1_id & 127u));\n"
+            #endif
+         #endif
+
+         #ifndef STBVOX_CONFIG_DISABLE_TEX2
+         "   float tex2_scale = tex2_props.y;\n"
+         "   bool texblend_mode = tex2_props.z != 0.0;\n"
+         #endif
+         "   vec2 texcoord;\n"
+         "   vec3 texturespace_pos = voxelspace_pos + transform[2].xyz;\n"
+         "   texcoord.s = dot(texturespace_pos, texgen_s);\n"
+         "   texcoord.t = dot(texturespace_pos, texgen_t);\n"
+
+         "   vec2  texcoord_1 = tex1_scale * texcoord;\n"
+         #ifndef STBVOX_CONFIG_DISABLE_TEX2
+         "   vec2  texcoord_2 = tex2_scale * texcoord;\n"
+         #endif
+
+         #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP
+         "   texcoord_1 = texcoord_1 - floor(texcoord_1);\n"
+         "   vec4 tex1 = textureGrad(tex_array[0], vec3(texcoord_1, float(tex1_id)), dFdx(tex1_scale*texcoord), dFdy(tex1_scale*texcoord));\n"
+         #else
+         "   vec4 tex1 = texture(tex_array[0], vec3(texcoord_1, float(tex1_id)));\n"
+         #endif
+
+         #ifndef STBVOX_CONFIG_DISABLE_TEX2
+         #ifdef STBVOX_CONFIG_TEX2_EDGE_CLAMP
+         "   texcoord_2 = texcoord_2 - floor(texcoord_2);\n"
+         "   vec4 tex2 = textureGrad(tex_array[0], vec3(texcoord_2, float(tex2_id)), dFdx(tex2_scale*texcoord), dFdy(tex2_scale*texcoord));\n"
+         #else
+         "   vec4 tex2 = texture(tex_array[1], vec3(texcoord_2, float(tex2_id)));\n"
+         #endif
+         #endif
+
+         "   bool emissive = (color.a > 1.0);\n"
+         "   color.a = min(color.a, 1.0);\n"
+
+         // recolor textures
+         "   if ((color_id &  64u) != 0u) tex1.rgba *= color.rgba;\n"
+         "   fragment_alpha = tex1.a;\n"
+         #ifndef STBVOX_CONFIG_DISABLE_TEX2
+            "   if ((color_id & 128u) != 0u) tex2.rgba *= color.rgba;\n"
+
+            #ifdef STBVOX_CONFIG_PREMULTIPLIED_ALPHA
+            "   tex2.rgba *= texlerp;\n"
+            #else
+            "   tex2.a *= texlerp;\n"
+            #endif
+
+            "   if (texblend_mode)\n"
+            "      albedo = tex1.xyz * rlerp(tex2.a, vec3(1.0,1.0,1.0), 2.0*tex2.xyz);\n"
+            "   else {\n"
+            #ifdef STBVOX_CONFIG_PREMULTIPLIED_ALPHA
+            "      albedo = (1.0-tex2.a)*tex1.xyz + tex2.xyz;\n"
+            #else
+            "      albedo = rlerp(tex2.a, tex1.xyz, tex2.xyz);\n"
+            #endif
+            "      fragment_alpha = tex1.a*(1-tex2.a)+tex2.a;\n"
+            "   }\n"
+         #else
+            "      albedo = tex1.xyz;\n"
+         #endif
+
+      #else // UNTEXTURED
+         "   vec4 color;"
+         "   color.xyz = vec3(facedata.xyz) / 255.0;\n"
+         "   bool emissive = false;\n"
+         "   albedo = color.xyz;\n"
+         "   fragment_alpha = 1.0;\n"
+      #endif
+
+      #ifdef STBVOX_ICONFIG_VARYING_VERTEX_NORMALS
+         // currently, there are no modes that trigger this path; idea is that there
+         // could be a couple of bits per vertex to perturb the normal to e.g. get curved look
+         "   vec3 normal = normalize(vnormal);\n"
+      #else
+         "   vec3 normal = vnormal;\n"
+      #endif
+
+      "   vec3 ambient_color = dot(normal, ambient[0].xyz) * ambient[1].xyz + ambient[2].xyz;\n"
+
+      "   ambient_color = clamp(ambient_color, 0.0, 1.0);"
+      "   ambient_color *= amb_occ;\n"
+
+      "   vec3 lit_color;\n"
+      "   if (!emissive)\n"
+      #if defined(STBVOX_ICONFIG_LIGHTING) || defined(STBVOX_CONFIG_LIGHTING_SIMPLE)
+         "      lit_color = compute_lighting(voxelspace_pos + transform[1], normal, albedo, ambient_color);\n"
+      #else
+         "      lit_color = albedo * ambient_color ;\n"
+      #endif
+      "   else\n"
+      "      lit_color = albedo;\n"
+
+      #if defined(STBVOX_ICONFIG_FOG) || defined(STBVOX_CONFIG_FOG_SMOOTHSTEP)
+         "   vec3 dist = voxelspace_pos + (transform[1] - camera_pos.xyz);\n"
+         "   lit_color = compute_fog(lit_color, dist, fragment_alpha);\n"
+      #endif
+
+      #ifdef STBVOX_CONFIG_UNPREMULTIPLY
+      "   vec4 final_color = vec4(lit_color/fragment_alpha, fragment_alpha);\n"
+      #else
+      "   vec4 final_color = vec4(lit_color, fragment_alpha);\n"
+      #endif
+      "   outcolor = final_color;\n"
+      "}\n"
+
+   #ifdef STBVOX_CONFIG_LIGHTING_SIMPLE
+      "\n"
+      "uniform vec3 light_source[2];\n"
+      "vec3 compute_lighting(vec3 pos, vec3 norm, vec3 albedo, vec3 ambient)\n"
+      "{\n"
+      "   vec3 light_dir = light_source[0] - pos;\n"
+      "   float lambert = dot(light_dir, norm) / dot(light_dir, light_dir);\n"
+      "   vec3 diffuse = clamp(light_source[1] * clamp(lambert, 0.0, 1.0), 0.0, 1.0);\n"
+      "   return (diffuse + ambient) * albedo;\n"
+      "}\n"
+   #endif
+
+   #ifdef STBVOX_CONFIG_FOG_SMOOTHSTEP
+      "\n"
+      "vec3 compute_fog(vec3 color, vec3 relative_pos, float fragment_alpha)\n"
+      "{\n"
+      "   float f = dot(relative_pos,relative_pos)*ambient[3].w;\n"
+      //"   f = rlerp(f, -2,1);\n"
+      "   f = clamp(f, 0.0, 1.0);\n"
+      "   f = 3.0*f*f - 2.0*f*f*f;\n" // smoothstep
+      //"   f = f*f;\n"  // fade in more smoothly
+      #ifdef STBVOX_CONFIG_PREMULTIPLIED_ALPHA
+      "   return rlerp(f, color.xyz, ambient[3].xyz*fragment_alpha);\n"
+      #else
+      "   return rlerp(f, color.xyz, ambient[3].xyz);\n"
+      #endif
+      "}\n"
+   #endif
+};
+
+
+// still requires full alpha lookups, including tex2 if texblend is enabled
+static const char *stbvox_fragment_program_alpha_only =
+{
+   STBVOX_SHADER_VERSION
+
+   // vertex-shader output data
+   "flat in uvec4  facedata;\n"
+   "     in  vec3  voxelspace_pos;\n"
+   "     in float  texlerp;\n"
+
+   // per-buffer data
+   "uniform vec3 transform[3];\n"
+
+   #ifndef STBVOX_ICONFIG_UNTEXTURED
+      // generally constant data
+      "uniform sampler2DArray tex_array[2];\n"
+
+      #ifdef STBVOX_CONFIG_PREFER_TEXBUFFER
+         "uniform samplerBuffer texscale;\n"
+         "uniform samplerBuffer texgen;\n"
+      #else
+         "uniform vec4 texscale[64];\n" // instead of 128, to avoid running out of uniforms
+         "uniform vec3 texgen[64];\n"
+      #endif
+   #endif
+
+   "out vec4  outcolor;\n"
+
+   "void main()\n"
+   "{\n"
+   "   vec3 albedo;\n"
+   "   float fragment_alpha;\n"
+
+   #ifndef STBVOX_ICONFIG_UNTEXTURED
+      // unpack the values
+      "   uint tex1_id = facedata.x;\n"
+      "   uint tex2_id = facedata.y;\n"
+      "   uint texprojid = facedata.w & 31u;\n"
+      "   uint color_id  = facedata.z;\n"
+
+      #ifndef STBVOX_CONFIG_PREFER_TEXBUFFER
+         // load from uniforms / texture buffers
+         "   vec3 texgen_s = texgen[texprojid];\n"
+         "   vec3 texgen_t = texgen[texprojid+32u];\n"
+         "   float tex1_scale = texscale[tex1_id & 63u].x;\n"
+         "   vec4 color = color_table[color_id & 63u];\n"
+         "   vec4 tex2_props = texscale[tex2_id & 63u];\n"
+      #else
+         "   vec3 texgen_s = texelFetch(texgen, int(texprojid)).xyz;\n"
+         "   vec3 texgen_t = texelFetch(texgen, int(texprojid+32u)).xyz;\n"
+         "   float tex1_scale = texelFetch(texscale, int(tex1_id & 127u)).x;\n"
+         "   vec4 color = texelFetch(color_table, int(color_id & 63u));\n"
+         "   vec4 tex2_props = texelFetch(texscale, int(tex2_id & 127u));\n"
+      #endif
+
+      #ifndef STBVOX_CONFIG_DISABLE_TEX2
+      "   float tex2_scale = tex2_props.y;\n"
+      "   bool texblend_mode = tex2_props.z &((facedata.w & 128u) != 0u);\n"
+      #endif
+
+      "   color.a = min(color.a, 1.0);\n"
+
+      "   vec2 texcoord;\n"
+      "   vec3 texturespace_pos = voxelspace_pos + transform[2].xyz;\n"
+      "   texcoord.s = dot(texturespace_pos, texgen_s);\n"
+      "   texcoord.t = dot(texturespace_pos, texgen_t);\n"
+
+      "   vec2  texcoord_1 = tex1_scale * texcoord;\n"
+      "   vec2  texcoord_2 = tex2_scale * texcoord;\n"
+
+      #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP
+      "   texcoord_1 = texcoord_1 - floor(texcoord_1);\n"
+      "   vec4 tex1 = textureGrad(tex_array[0], vec3(texcoord_1, float(tex1_id)), dFdx(tex1_scale*texcoord), dFdy(tex1_scale*texcoord));\n"
+      #else
+      "   vec4 tex1 = texture(tex_array[0], vec3(texcoord_1, float(tex1_id)));\n"
+      #endif
+
+      "   if ((color_id &  64u) != 0u) tex1.a *= color.a;\n"
+      "   fragment_alpha = tex1.a;\n"
+
+      #ifndef STBVOX_CONFIG_DISABLE_TEX2
+      "   if (!texblend_mode) {\n"
+         #ifdef STBVOX_CONFIG_TEX2_EDGE_CLAMP
+         "      texcoord_2 = texcoord_2 - floor(texcoord_2);\n"
+         "      vec4 tex2 = textureGrad(tex_array[0], vec3(texcoord_2, float(tex2_id)), dFdx(tex2_scale*texcoord), dFdy(tex2_scale*texcoord));\n"
+         #else
+         "      vec4 tex2 = texture(tex_array[1], vec3(texcoord_2, float(tex2_id)));\n"
+         #endif
+
+         "      tex2.a *= texlerp;\n"
+         "      if ((color_id & 128u) != 0u) tex2.rgba *= color.a;\n"
+         "      fragment_alpha = tex1.a*(1-tex2.a)+tex2.a;\n"
+         "}\n"
+      "\n"
+      #endif
+
+   #else // UNTEXTURED
+      "   fragment_alpha = 1.0;\n"
+   #endif
+
+   "   outcolor = vec4(0.0, 0.0, 0.0, fragment_alpha);\n"
+   "}\n"
+};
+
+
+STBVXDEC char *stbvox_get_vertex_shader(void)
+{
+   return (char *) stbvox_vertex_program;
+}
+
+STBVXDEC char *stbvox_get_fragment_shader(void)
+{
+   return (char *) stbvox_fragment_program;
+}
+
+STBVXDEC char *stbvox_get_fragment_shader_alpha_only(void)
+{
+   return (char *) stbvox_fragment_program_alpha_only;
+}
+
+static float stbvox_dummy_transform[3][3];
+
+#ifdef STBVOX_CONFIG_PREFER_TEXBUFFER
+#define STBVOX_TEXBUF 1
+#else
+#define STBVOX_TEXBUF 0
+#endif
+
+static stbvox_uniform_info stbvox_uniforms[] =
+{
+   { STBVOX_UNIFORM_TYPE_sampler  ,  4,   1, (char*) "facearray"    , 0                           },
+   { STBVOX_UNIFORM_TYPE_vec3     , 12,   3, (char*) "transform"    , stbvox_dummy_transform[0]   },
+   { STBVOX_UNIFORM_TYPE_sampler  ,  4,   2, (char*) "tex_array"    , 0                           },
+   { STBVOX_UNIFORM_TYPE_vec4     , 16, 128, (char*) "texscale"     , stbvox_default_texscale[0] , STBVOX_TEXBUF },
+   { STBVOX_UNIFORM_TYPE_vec4     , 16,  64, (char*) "color_table"  , stbvox_default_palette[0]  , STBVOX_TEXBUF },
+   { STBVOX_UNIFORM_TYPE_vec3     , 12,  32, (char*) "normal_table" , stbvox_default_normals[0]   },
+   { STBVOX_UNIFORM_TYPE_vec3     , 12,  64, (char*) "texgen"       , stbvox_default_texgen[0][0], STBVOX_TEXBUF },
+   { STBVOX_UNIFORM_TYPE_vec4     , 16,   4, (char*) "ambient"      , stbvox_default_ambient[0]   },
+   { STBVOX_UNIFORM_TYPE_vec4     , 16,   1, (char*) "camera_pos"   , stbvox_dummy_transform[0]   },
+};
+
+STBVXDEC int stbvox_get_uniform_info(stbvox_uniform_info *info, int uniform)
+{
+   if (uniform < 0 || uniform >= STBVOX_UNIFORM_count)
+      return 0;
+
+   *info = stbvox_uniforms[uniform];
+   return 1;
+}
+
+#define STBVOX_GET_GEO(geom_data)  ((geom_data) & 15)
+
+typedef struct
+{
+   unsigned char block:2;
+   unsigned char overlay:2;
+   unsigned char facerot:2;
+   unsigned char ecolor:2;
+} stbvox_rotate;
+
+typedef struct
+{
+   unsigned char x,y,z;
+} stbvox_pos;
+
+static unsigned char stbvox_rotate_face[6][4] =
+{
+   { 0,1,2,3 },
+   { 1,2,3,0 },
+   { 2,3,0,1 },
+   { 3,0,1,2 },
+   { 4,4,4,4 },
+   { 5,5,5,5 },
+};
+
+#define STBVOX_ROTATE(x,r)   stbvox_rotate_face[x][r] // (((x)+(r))&3)
+
+stbvox_mesh_face stbvox_compute_mesh_face_value(stbvox_mesh_maker *mm, stbvox_rotate rot, int face, int v_off, int normal)
+{
+   stbvox_mesh_face face_data = { 0 };
+   stbvox_block_type bt = mm->input.blocktype[v_off];
+   unsigned char bt_face = STBVOX_ROTATE(face, rot.block);
+   int facerot = rot.facerot;
+
+   #ifdef STBVOX_ICONFIG_UNTEXTURED
+   if (mm->input.rgb) {
+      face_data.tex1  = mm->input.rgb[v_off].r;
+      face_data.tex2  = mm->input.rgb[v_off].g;
+      face_data.color = mm->input.rgb[v_off].b;
+      face_data.face_info = (normal<<2);
+      return face_data;
+   }
+   #else
+   unsigned char color_face;
+
+   if (mm->input.color)
+      face_data.color = mm->input.color[v_off];
+
+   if (mm->input.block_tex1)
+      face_data.tex1 = mm->input.block_tex1[bt];
+   else if (mm->input.block_tex1_face)
+      face_data.tex1 = mm->input.block_tex1_face[bt][bt_face];
+   else
+      face_data.tex1 = bt;
+
+   if (mm->input.block_tex2)
+      face_data.tex2 = mm->input.block_tex2[bt];
+   else if (mm->input.block_tex2_face)
+      face_data.tex2 = mm->input.block_tex2_face[bt][bt_face];
+
+   if (mm->input.block_color) {
+      unsigned char mcol = mm->input.block_color[bt];
+      if (mcol)
+         face_data.color = mcol;
+   } else if (mm->input.block_color_face) {
+      unsigned char mcol = mm->input.block_color_face[bt][bt_face];
+      if (mcol)
+         face_data.color = mcol;
+   }
+
+   if (face <= STBVOX_FACE_south) {
+      if (mm->input.side_texrot)
+         facerot = mm->input.side_texrot[v_off] >> (2 * face);
+      else if (mm->input.block_side_texrot)
+         facerot = mm->input.block_side_texrot[v_off] >> (2 * bt_face);
+   }
+
+   if (mm->input.overlay) {
+      int over_face = STBVOX_ROTATE(face, rot.overlay);
+      unsigned char over = mm->input.overlay[v_off];
+      if (over) {
+         if (mm->input.overlay_tex1) {
+            unsigned char rep1 = mm->input.overlay_tex1[over][over_face];
+            if (rep1)
+               face_data.tex1 = rep1;
+         }
+         if (mm->input.overlay_tex2) {
+            unsigned char rep2 = mm->input.overlay_tex2[over][over_face];
+            if (rep2)
+               face_data.tex2 = rep2;
+         }
+         if (mm->input.overlay_color) {
+            unsigned char rep3 = mm->input.overlay_color[over][over_face];
+            if (rep3)
+               face_data.color = rep3;
+         }
+
+         if (mm->input.overlay_side_texrot && face <= STBVOX_FACE_south)
+            facerot = mm->input.overlay_side_texrot[over] >> (2*over_face);
+      }
+   }
+
+   if (mm->input.tex2_for_tex1)
+      face_data.tex2 = mm->input.tex2_for_tex1[face_data.tex1];
+   if (mm->input.tex2)
+      face_data.tex2 = mm->input.tex2[v_off];
+   if (mm->input.tex2_replace) {
+      if (mm->input.tex2_facemask[v_off] & (1 << face))
+         face_data.tex2 = mm->input.tex2_replace[v_off];
+   }
+
+   color_face = STBVOX_ROTATE(face, rot.ecolor);
+   if (mm->input.extended_color) {
+      unsigned char ec = mm->input.extended_color[v_off];
+      if (mm->input.ecolor_facemask[ec] & (1 << color_face))
+         face_data.color = mm->input.ecolor_color[ec];
+   }
+
+   if (mm->input.color2) {
+      if (mm->input.color2_facemask[v_off] & (1 << color_face))
+         face_data.color = mm->input.color2[v_off];
+      if (mm->input.color3 && (mm->input.color3_facemask[v_off] & (1 << color_face)))
+         face_data.color = mm->input.color3[v_off];
+   }
+   #endif
+
+   face_data.face_info = (normal<<2) + facerot;
+   return face_data;
+}
+
+// these are the types of faces each block can have
+enum
+{
+   STBVOX_FT_none    ,
+   STBVOX_FT_upper   ,
+   STBVOX_FT_lower   ,
+   STBVOX_FT_solid   ,
+   STBVOX_FT_diag_012,
+   STBVOX_FT_diag_023,
+   STBVOX_FT_diag_013,
+   STBVOX_FT_diag_123,
+   STBVOX_FT_force   , // can't be covered up, used for internal faces, also hides nothing
+   STBVOX_FT_partial , // only covered by solid, never covers anything else
+
+   STBVOX_FT_count
+};
+
+static unsigned char stbvox_face_lerp[6] = { 0,2,0,2,4,4 };
+static unsigned char stbvox_vert3_lerp[5] = { 0,3,6,9,12 };
+static unsigned char stbvox_vert_lerp_for_face_lerp[4] = { 0, 4, 7, 7 };
+static unsigned char stbvox_face3_lerp[6] = { 0,3,6,9,12,14 };
+static unsigned char stbvox_vert_lerp_for_simple[4] = { 0,2,5,7 };
+static unsigned char stbvox_face3_updown[8] = { 0,2,5,7,0,2,5,7 }; // ignore top bit
+
+// vertex offsets for face vertices
+static unsigned char stbvox_vertex_vector[6][4][3] =
+{
+   { { 1,0,1 }, { 1,1,1 }, { 1,1,0 }, { 1,0,0 } }, // east
+   { { 1,1,1 }, { 0,1,1 }, { 0,1,0 }, { 1,1,0 } }, // north
+   { { 0,1,1 }, { 0,0,1 }, { 0,0,0 }, { 0,1,0 } }, // west
+   { { 0,0,1 }, { 1,0,1 }, { 1,0,0 }, { 0,0,0 } }, // south
+   { { 0,1,1 }, { 1,1,1 }, { 1,0,1 }, { 0,0,1 } }, // up
+   { { 0,0,0 }, { 1,0,0 }, { 1,1,0 }, { 0,1,0 } }, // down
+};
+
+// stbvox_vertex_vector, but read coordinates as binary numbers, zyx
+static unsigned char stbvox_vertex_selector[6][4] =
+{
+   { 5,7,3,1 },
+   { 7,6,2,3 },
+   { 6,4,0,2 },
+   { 4,5,1,0 },
+   { 6,7,5,4 },
+   { 0,1,3,2 },
+};
+
+static stbvox_mesh_vertex stbvox_vmesh_delta_normal[6][4] =
+{
+   {  stbvox_vertex_encode(1,0,1,0,0) ,
+      stbvox_vertex_encode(1,1,1,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0)  },
+   {  stbvox_vertex_encode(1,1,1,0,0) ,
+      stbvox_vertex_encode(0,1,1,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0)  },
+   {  stbvox_vertex_encode(0,1,1,0,0) ,
+      stbvox_vertex_encode(0,0,1,0,0) ,
+      stbvox_vertex_encode(0,0,0,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0)  },
+   {  stbvox_vertex_encode(0,0,1,0,0) ,
+      stbvox_vertex_encode(1,0,1,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(0,0,0,0,0)  },
+   {  stbvox_vertex_encode(0,1,1,0,0) ,
+      stbvox_vertex_encode(1,1,1,0,0) ,
+      stbvox_vertex_encode(1,0,1,0,0) ,
+      stbvox_vertex_encode(0,0,1,0,0)  },
+   {  stbvox_vertex_encode(0,0,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0)  }
+};
+
+static stbvox_mesh_vertex stbvox_vmesh_pre_vheight[6][4] =
+{
+   {  stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0)  },
+   {  stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0)  },
+   {  stbvox_vertex_encode(0,1,0,0,0) ,
+      stbvox_vertex_encode(0,0,0,0,0) ,
+      stbvox_vertex_encode(0,0,0,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0)  },
+   {  stbvox_vertex_encode(0,0,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(0,0,0,0,0)  },
+   {  stbvox_vertex_encode(0,1,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(0,0,0,0,0)  },
+   {  stbvox_vertex_encode(0,0,0,0,0) ,
+      stbvox_vertex_encode(1,0,0,0,0) ,
+      stbvox_vertex_encode(1,1,0,0,0) ,
+      stbvox_vertex_encode(0,1,0,0,0)  }
+};
+
+static stbvox_mesh_vertex stbvox_vmesh_delta_half_z[6][4] =
+{
+   { stbvox_vertex_encode(1,0,2,0,0) ,
+     stbvox_vertex_encode(1,1,2,0,0) ,
+     stbvox_vertex_encode(1,1,0,0,0) ,
+     stbvox_vertex_encode(1,0,0,0,0)  },
+   { stbvox_vertex_encode(1,1,2,0,0) ,
+     stbvox_vertex_encode(0,1,2,0,0) ,
+     stbvox_vertex_encode(0,1,0,0,0) ,
+     stbvox_vertex_encode(1,1,0,0,0)  },
+   { stbvox_vertex_encode(0,1,2,0,0) ,
+     stbvox_vertex_encode(0,0,2,0,0) ,
+     stbvox_vertex_encode(0,0,0,0,0) ,
+     stbvox_vertex_encode(0,1,0,0,0)  },
+   { stbvox_vertex_encode(0,0,2,0,0) ,
+     stbvox_vertex_encode(1,0,2,0,0) ,
+     stbvox_vertex_encode(1,0,0,0,0) ,
+     stbvox_vertex_encode(0,0,0,0,0)  },
+   { stbvox_vertex_encode(0,1,2,0,0) ,
+     stbvox_vertex_encode(1,1,2,0,0) ,
+     stbvox_vertex_encode(1,0,2,0,0) ,
+     stbvox_vertex_encode(0,0,2,0,0)  },
+   { stbvox_vertex_encode(0,0,0,0,0) ,
+     stbvox_vertex_encode(1,0,0,0,0) ,
+     stbvox_vertex_encode(1,1,0,0,0) ,
+     stbvox_vertex_encode(0,1,0,0,0)  }
+};
+
+static stbvox_mesh_vertex stbvox_vmesh_crossed_pair[6][4] =
+{
+   { stbvox_vertex_encode(1,0,2,0,0) ,
+     stbvox_vertex_encode(0,1,2,0,0) ,
+     stbvox_vertex_encode(0,1,0,0,0) ,
+     stbvox_vertex_encode(1,0,0,0,0)  },
+   { stbvox_vertex_encode(1,1,2,0,0) ,
+     stbvox_vertex_encode(0,0,2,0,0) ,
+     stbvox_vertex_encode(0,0,0,0,0) ,
+     stbvox_vertex_encode(1,1,0,0,0)  },
+   { stbvox_vertex_encode(0,1,2,0,0) ,
+     stbvox_vertex_encode(1,0,2,0,0) ,
+     stbvox_vertex_encode(1,0,0,0,0) ,
+     stbvox_vertex_encode(0,1,0,0,0)  },
+   { stbvox_vertex_encode(0,0,2,0,0) ,
+     stbvox_vertex_encode(1,1,2,0,0) ,
+     stbvox_vertex_encode(1,1,0,0,0) ,
+     stbvox_vertex_encode(0,0,0,0,0)  },
+   // not used, so we leave it non-degenerate to make sure it doesn't get gen'd accidentally
+   { stbvox_vertex_encode(0,1,2,0,0) ,
+     stbvox_vertex_encode(1,1,2,0,0) ,
+     stbvox_vertex_encode(1,0,2,0,0) ,
+     stbvox_vertex_encode(0,0,2,0,0)  },
+   { stbvox_vertex_encode(0,0,0,0,0) ,
+     stbvox_vertex_encode(1,0,0,0,0) ,
+     stbvox_vertex_encode(1,1,0,0,0) ,
+     stbvox_vertex_encode(0,1,0,0,0)  }
+};
+
+#define STBVOX_MAX_GEOM     16
+#define STBVOX_NUM_ROTATION  4
+
+// this is used to determine if a face is ever generated at all
+static unsigned char stbvox_hasface[STBVOX_MAX_GEOM][STBVOX_NUM_ROTATION] =
+{
+   { 0,0,0,0 }, // empty
+   { 0,0,0,0 }, // knockout
+   { 63,63,63,63 }, // solid
+   { 63,63,63,63 }, // transp
+   { 63,63,63,63 }, // slab
+   { 63,63,63,63 }, // slab
+   { 1|2|4|48, 8|1|2|48, 4|8|1|48, 2|4|8|48, }, // floor slopes
+   { 1|2|4|48, 8|1|2|48, 4|8|1|48, 2|4|8|48, }, // ceil slopes
+   { 47,47,47,47 }, // wall-projected diagonal with down face
+   { 31,31,31,31 }, // wall-projected diagonal with up face
+   { 63,63,63,63 }, // crossed-pair has special handling, but avoid early-out
+   { 63,63,63,63 }, // force
+   { 63,63,63,63 }, // vheight
+   { 63,63,63,63 }, // vheight
+   { 63,63,63,63 }, // vheight
+   { 63,63,63,63 }, // vheight
+};
+
+// this determines which face type above is visible on each side of the geometry
+static unsigned char stbvox_facetype[STBVOX_GEOM_count][6] =
+{
+   { 0, },  // STBVOX_GEOM_empty
+   { STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid }, // knockout
+   { STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid, STBVOX_FT_solid }, // solid
+   { STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force }, // transp
+
+   { STBVOX_FT_upper, STBVOX_FT_upper, STBVOX_FT_upper, STBVOX_FT_upper, STBVOX_FT_solid, STBVOX_FT_force },
+   { STBVOX_FT_lower, STBVOX_FT_lower, STBVOX_FT_lower, STBVOX_FT_lower, STBVOX_FT_force, STBVOX_FT_solid },
+   { STBVOX_FT_diag_123, STBVOX_FT_solid, STBVOX_FT_diag_023, STBVOX_FT_none, STBVOX_FT_force, STBVOX_FT_solid },
+   { STBVOX_FT_diag_012, STBVOX_FT_solid, STBVOX_FT_diag_013, STBVOX_FT_none, STBVOX_FT_solid, STBVOX_FT_force },
+
+   { STBVOX_FT_diag_123, STBVOX_FT_solid, STBVOX_FT_diag_023, STBVOX_FT_force, STBVOX_FT_none, STBVOX_FT_solid },
+   { STBVOX_FT_diag_012, STBVOX_FT_solid, STBVOX_FT_diag_013, STBVOX_FT_force, STBVOX_FT_solid, STBVOX_FT_none },
+   { STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, 0,0 }, // crossed pair
+   { STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force, STBVOX_FT_force }, // GEOM_force
+
+   { STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial, STBVOX_FT_force, STBVOX_FT_solid }, // floor vheight, all neighbors forced
+   { STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial, STBVOX_FT_force, STBVOX_FT_solid }, // floor vheight, all neighbors forced
+   { STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial, STBVOX_FT_solid, STBVOX_FT_force }, // ceil vheight, all neighbors forced
+   { STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial,STBVOX_FT_partial, STBVOX_FT_solid, STBVOX_FT_force }, // ceil vheight, all neighbors forced
+};
+
+// This table indicates what normal to use for the "up" face of a sloped geom
+// @TODO this could be done with math given the current arrangement of the enum, but let's not require it
+static unsigned char stbvox_floor_slope_for_rot[4] =
+{
+   STBVF_su,
+   STBVF_wu, // @TODO: why is this reversed from what it should be? this is a north-is-up face, so slope should be south&up
+   STBVF_nu,
+   STBVF_eu,
+};
+
+static unsigned char stbvox_ceil_slope_for_rot[4] =
+{
+   STBVF_sd,
+   STBVF_ed,
+   STBVF_nd,
+   STBVF_wd,
+};
+
+// this table indicates whether, for each pair of types above, a face is visible.
+// each value indicates whether a given type is visible for all neighbor types
+static unsigned short stbvox_face_visible[STBVOX_FT_count] =
+{
+   // we encode the table by listing which cases cause *obscuration*, and bitwise inverting that
+   // table is pre-shifted by 5 to save a shift when it's accessed
+   (unsigned short) ((~0x07ffu                                          )<<5),  // none is completely obscured by everything
+   (unsigned short) ((~((1u<<STBVOX_FT_solid) | (1<<STBVOX_FT_upper)   ))<<5),  // upper
+   (unsigned short) ((~((1u<<STBVOX_FT_solid) | (1<<STBVOX_FT_lower)   ))<<5),  // lower
+   (unsigned short) ((~((1u<<STBVOX_FT_solid)                          ))<<5),  // solid is only completely obscured only by solid
+   (unsigned short) ((~((1u<<STBVOX_FT_solid) | (1<<STBVOX_FT_diag_013)))<<5),  // diag012 matches diag013
+   (unsigned short) ((~((1u<<STBVOX_FT_solid) | (1<<STBVOX_FT_diag_123)))<<5),  // diag023 matches diag123
+   (unsigned short) ((~((1u<<STBVOX_FT_solid) | (1<<STBVOX_FT_diag_012)))<<5),  // diag013 matches diag012
+   (unsigned short) ((~((1u<<STBVOX_FT_solid) | (1<<STBVOX_FT_diag_023)))<<5),  // diag123 matches diag023
+   (unsigned short) ((~0u                                               )<<5),  // force is always rendered regardless, always forces neighbor
+   (unsigned short) ((~((1u<<STBVOX_FT_solid)                          ))<<5),  // partial is only completely obscured only by solid
+};
+
+// the vertex heights of the block types, in binary vertex order (zyx):
+// lower: SW, SE, NW, NE; upper: SW, SE, NW, NE
+static stbvox_mesh_vertex stbvox_geometry_vheight[8][8] =
+{
+   #define STBVOX_HEIGHTS(a,b,c,d,e,f,g,h) \
+     { stbvox_vertex_encode(0,0,a,0,0),  \
+       stbvox_vertex_encode(0,0,b,0,0),  \
+       stbvox_vertex_encode(0,0,c,0,0),  \
+       stbvox_vertex_encode(0,0,d,0,0),  \
+       stbvox_vertex_encode(0,0,e,0,0),  \
+       stbvox_vertex_encode(0,0,f,0,0),  \
+       stbvox_vertex_encode(0,0,g,0,0),  \
+       stbvox_vertex_encode(0,0,h,0,0) }
+
+   STBVOX_HEIGHTS(0,0,0,0, 2,2,2,2),
+   STBVOX_HEIGHTS(0,0,0,0, 2,2,2,2),
+   STBVOX_HEIGHTS(0,0,0,0, 2,2,2,2),
+   STBVOX_HEIGHTS(0,0,0,0, 2,2,2,2),
+   STBVOX_HEIGHTS(1,1,1,1, 2,2,2,2),
+   STBVOX_HEIGHTS(0,0,0,0, 1,1,1,1),
+   STBVOX_HEIGHTS(0,0,0,0, 0,0,2,2),
+   STBVOX_HEIGHTS(2,2,0,0, 2,2,2,2),
+};
+
+// rotate vertices defined as [z][y][x] coords
+static unsigned char stbvox_rotate_vertex[8][4] =
+{
+   { 0,1,3,2 }, // zyx=000
+   { 1,3,2,0 }, // zyx=001
+   { 2,0,1,3 }, // zyx=010
+   { 3,2,0,1 }, // zyx=011
+   { 4,5,7,6 }, // zyx=100
+   { 5,7,6,4 }, // zyx=101
+   { 6,4,5,7 }, // zyx=110
+   { 7,6,4,5 }, // zyx=111
+};
+
+#ifdef STBVOX_CONFIG_OPTIMIZED_VHEIGHT
+// optimized vheight generates a single normal over the entire face, even if it's not planar
+static unsigned char stbvox_optimized_face_up_normal[4][4][4][4] =
+{
+   {
+      {
+         { STBVF_u   , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+         { STBVF_nw_u, STBVF_nu  , STBVF_nu  , STBVF_ne_u, },
+         { STBVF_nw_u, STBVF_nu  , STBVF_nu  , STBVF_nu  , },
+         { STBVF_nw_u, STBVF_nw_u, STBVF_nu  , STBVF_nu  , },
+      },{
+         { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+         { STBVF_u   , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+         { STBVF_nw_u, STBVF_nu  , STBVF_nu  , STBVF_ne_u, },
+         { STBVF_nw_u, STBVF_nu  , STBVF_nu  , STBVF_nu  , },
+      },{
+         { STBVF_eu  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+         { STBVF_u   , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+         { STBVF_nw_u, STBVF_nu  , STBVF_nu  , STBVF_ne_u, },
+      },{
+         { STBVF_eu  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_eu  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+         { STBVF_u   , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+      },
+   },{
+      {
+         { STBVF_sw_u, STBVF_u   , STBVF_ne_u, STBVF_ne_u, },
+         { STBVF_wu  , STBVF_nw_u, STBVF_nu  , STBVF_nu  , },
+         { STBVF_wu  , STBVF_nw_u, STBVF_nu  , STBVF_nu  , },
+         { STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, STBVF_nu  , },
+      },{
+         { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_u   , STBVF_ne_u, STBVF_ne_u, },
+         { STBVF_wu  , STBVF_nw_u, STBVF_nu  , STBVF_nu  , },
+         { STBVF_wu  , STBVF_nw_u, STBVF_nu  , STBVF_nu  , },
+      },{
+         { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_u   , STBVF_ne_u, STBVF_ne_u, },
+         { STBVF_wu  , STBVF_nw_u, STBVF_nu  , STBVF_nu  , },
+      },{
+         { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_u   , STBVF_ne_u, STBVF_ne_u, },
+      },
+   },{
+      {
+         { STBVF_sw_u, STBVF_sw_u, STBVF_u   , STBVF_ne_u, },
+         { STBVF_wu  , STBVF_wu  , STBVF_nw_u, STBVF_nu  , },
+         { STBVF_wu  , STBVF_wu  , STBVF_nw_u, STBVF_nu  , },
+         { STBVF_wu  , STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, },
+      },{
+         { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_sw_u, STBVF_u   , STBVF_ne_u, },
+         { STBVF_wu  , STBVF_wu  , STBVF_nw_u, STBVF_nu  , },
+         { STBVF_wu  , STBVF_wu  , STBVF_nw_u, STBVF_nu  , },
+      },{
+         { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_sw_u, STBVF_u   , STBVF_ne_u, },
+         { STBVF_wu  , STBVF_wu  , STBVF_nw_u, STBVF_nu  , },
+      },{
+         { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+         { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_sw_u, STBVF_u   , STBVF_ne_u, },
+      },
+   },{
+      {
+         { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u   , },
+         { STBVF_sw_u, STBVF_wu  , STBVF_wu  , STBVF_nw_u, },
+         { STBVF_wu  , STBVF_wu  , STBVF_wu  , STBVF_nw_u, },
+         { STBVF_wu  , STBVF_wu  , STBVF_nw_u, STBVF_nw_u, },
+      },{
+         { STBVF_sw_u, STBVF_su  , STBVF_su  , STBVF_su  , },
+         { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u   , },
+         { STBVF_sw_u, STBVF_wu  , STBVF_wu  , STBVF_nw_u, },
+         { STBVF_wu  , STBVF_wu  , STBVF_wu  , STBVF_nw_u, },
+      },{
+         { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_su  , STBVF_su  , STBVF_su  , },
+         { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u   , },
+         { STBVF_sw_u, STBVF_wu  , STBVF_wu  , STBVF_nw_u, },
+      },{
+         { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+         { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+         { STBVF_sw_u, STBVF_su  , STBVF_su  , STBVF_su  , },
+         { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u   , },
+      },
+   },
+};
+#else
+// which normal to use for a given vheight that's planar
+// @TODO: this table was constructed by hand and may have bugs
+//                                 nw se sw
+static unsigned char stbvox_planar_face_up_normal[4][4][4] =
+{
+   {                                                      // sw,se,nw,ne;  ne = se+nw-sw
+      { STBVF_u   , 0         , 0         , 0          }, //  0,0,0,0; 1,0,0,-1; 2,0,0,-2; 3,0,0,-3;
+      { STBVF_u   , STBVF_u   , 0         , 0          }, //  0,1,0,1; 1,1,0, 0; 2,1,0,-1; 3,1,0,-2;
+      { STBVF_wu  , STBVF_nw_u, STBVF_nu  , 0          }, //  0,2,0,2; 1,2,0, 1; 2,2,0, 0; 3,2,0,-1;
+      { STBVF_wu  , STBVF_nw_u, STBVF_nw_u, STBVF_nu   }, //  0,3,0,3; 1,3,0, 2; 2,3,0, 1; 3,3,0, 0;
+   },{
+      { STBVF_u   , STBVF_u   , 0         , 0          }, //  0,0,1,1; 1,0,1, 0; 2,0,1,-1; 3,0,1,-2;
+      { STBVF_sw_u, STBVF_u   , STBVF_ne_u, 0          }, //  0,1,1,2; 1,1,1, 1; 2,1,1, 0; 3,1,1,-1;
+      { STBVF_sw_u, STBVF_u   , STBVF_u   , STBVF_ne_u }, //  0,2,1,3; 1,2,1, 2; 2,2,1, 1; 3,2,1, 0;
+      { 0         , STBVF_wu  , STBVF_nw_u, STBVF_nu   }, //  0,3,1,4; 1,3,1, 3; 2,3,1, 2; 3,3,1, 1;
+   },{
+      { STBVF_su  , STBVF_se_u, STBVF_eu  , 0          }, //  0,0,2,2; 1,0,2, 1; 2,0,2, 0; 3,0,2,-1;
+      { STBVF_sw_u, STBVF_u   , STBVF_u   , STBVF_ne_u }, //  0,1,2,3; 1,1,2, 2; 2,1,2, 1; 3,1,2, 0;
+      { 0         , STBVF_sw_u, STBVF_u   , STBVF_ne_u }, //  0,2,2,4; 1,2,2, 3; 2,2,2, 2; 3,2,2, 1;
+      { 0         , 0         , STBVF_u   , STBVF_u    }, //  0,3,2,5; 1,3,2, 4; 2,3,2, 3; 3,3,2, 2;
+   },{
+      { STBVF_su  , STBVF_se_u, STBVF_se_u, STBVF_eu   }, //  0,0,3,3; 1,0,3, 2; 2,0,3, 1; 3,0,3, 0;
+      { 0         , STBVF_su  , STBVF_se_u, STBVF_eu   }, //  0,1,3,4; 1,1,3, 3; 2,1,3, 2; 3,1,3, 1;
+      { 0         , 0         , STBVF_u   , STBVF_u    }, //  0,2,3,5; 1,2,3, 4; 2,2,3, 3; 3,2,3, 2;
+      { 0         , 0         , 0         , STBVF_u    }, //  0,3,3,6; 1,3,3, 5; 2,3,3, 4; 3,3,3, 3;
+   }
+};
+
+// these tables were constructed automatically using a variant of the code
+// below; however, they seem wrong, so who knows
+static unsigned char stbvox_face_up_normal_012[4][4][4] =
+{
+   {
+      { STBVF_u   , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_wu  , STBVF_nu  , STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_wu  , STBVF_nw_u, STBVF_nu  , STBVF_ne_u, },
+      { STBVF_wu  , STBVF_nw_u, STBVF_nw_u, STBVF_nu  , },
+   },{
+      { STBVF_su  , STBVF_eu  , STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_sw_u, STBVF_u   , STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_sw_u, STBVF_wu  , STBVF_nu  , STBVF_ne_u, },
+      { STBVF_sw_u, STBVF_wu  , STBVF_nw_u, STBVF_nu  , },
+   },{
+      { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+      { STBVF_sw_u, STBVF_su  , STBVF_eu  , STBVF_ne_u, },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_u   , STBVF_ne_u, },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_wu  , STBVF_nu  , },
+   },{
+      { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+      { STBVF_sw_u, STBVF_su  , STBVF_eu  , STBVF_eu  , },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_su  , STBVF_eu  , },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_u   , },
+   }
+};
+
+static unsigned char stbvox_face_up_normal_013[4][4][4] =
+{
+   {
+      { STBVF_u   , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+      { STBVF_nw_u, STBVF_nu  , STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_nw_u, STBVF_nw_u, STBVF_nu  , STBVF_ne_u, },
+      { STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, STBVF_nu  , },
+   },{
+      { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+      { STBVF_wu  , STBVF_u   , STBVF_eu  , STBVF_eu  , },
+      { STBVF_nw_u, STBVF_nw_u, STBVF_nu  , STBVF_ne_u, },
+      { STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, STBVF_nu  , },
+   },{
+      { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+      { STBVF_sw_u, STBVF_su  , STBVF_eu  , STBVF_eu  , },
+      { STBVF_wu  , STBVF_wu  , STBVF_u   , STBVF_eu  , },
+      { STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, STBVF_nu  , },
+   },{
+      { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_eu  , },
+      { STBVF_sw_u, STBVF_su  , STBVF_su  , STBVF_su  , },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_su  , STBVF_eu  , },
+      { STBVF_wu  , STBVF_wu  , STBVF_wu  , STBVF_u   , },
+   }
+};
+
+static unsigned char stbvox_face_up_normal_023[4][4][4] =
+{
+   {
+      { STBVF_u   , STBVF_nu  , STBVF_nu  , STBVF_nu  , },
+      { STBVF_eu  , STBVF_eu  , STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+      { STBVF_eu  , STBVF_eu  , STBVF_eu  , STBVF_eu  , },
+   },{
+      { STBVF_wu  , STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, },
+      { STBVF_su  , STBVF_u   , STBVF_nu  , STBVF_nu  , },
+      { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+      { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+   },{
+      { STBVF_wu  , STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, },
+      { STBVF_sw_u, STBVF_wu  , STBVF_nw_u, STBVF_nw_u, },
+      { STBVF_su  , STBVF_su  , STBVF_u   , STBVF_nu  , },
+      { STBVF_su  , STBVF_su  , STBVF_eu  , STBVF_eu  , },
+   },{
+      { STBVF_wu  , STBVF_nw_u, STBVF_nw_u, STBVF_nw_u, },
+      { STBVF_sw_u, STBVF_wu  , STBVF_nw_u, STBVF_nw_u, },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_wu  , STBVF_nw_u, },
+      { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_u   , },
+   }
+};
+
+static unsigned char stbvox_face_up_normal_123[4][4][4] =
+{
+   {
+      { STBVF_u   , STBVF_nu  , STBVF_nu  , STBVF_nu  , },
+      { STBVF_eu  , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_eu  , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_eu  , STBVF_ne_u, STBVF_ne_u, STBVF_ne_u, },
+   },{
+      { STBVF_sw_u, STBVF_wu  , STBVF_nw_u, STBVF_nw_u, },
+      { STBVF_su  , STBVF_u   , STBVF_nu  , STBVF_nu  , },
+      { STBVF_eu  , STBVF_eu  , STBVF_ne_u, STBVF_ne_u, },
+      { STBVF_eu  , STBVF_eu  , STBVF_ne_u, STBVF_ne_u, },
+   },{
+      { STBVF_sw_u, STBVF_sw_u, STBVF_wu  , STBVF_nw_u, },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_wu  , STBVF_nw_u, },
+      { STBVF_su  , STBVF_su  , STBVF_u   , STBVF_nu  , },
+      { STBVF_su  , STBVF_eu  , STBVF_eu  , STBVF_ne_u, },
+   },{
+      { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_wu  , },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_wu  , },
+      { STBVF_sw_u, STBVF_sw_u, STBVF_sw_u, STBVF_wu  , },
+      { STBVF_su  , STBVF_su  , STBVF_su  , STBVF_u   , },
+   }
+};
+#endif
+
+void stbvox_get_quad_vertex_pointer(stbvox_mesh_maker *mm, int mesh, stbvox_mesh_vertex **vertices, stbvox_mesh_face face)
+{
+   char *p = mm->output_cur[mesh][0];
+   int step = mm->output_step[mesh][0];
+
+   // allocate a new quad from the mesh
+   vertices[0] = (stbvox_mesh_vertex *) p; p += step;
+   vertices[1] = (stbvox_mesh_vertex *) p; p += step;
+   vertices[2] = (stbvox_mesh_vertex *) p; p += step;
+   vertices[3] = (stbvox_mesh_vertex *) p; p += step;
+   mm->output_cur[mesh][0] = p;
+
+   // output the face
+   #ifdef STBVOX_ICONFIG_FACE_ATTRIBUTE
+      // write face as interleaved vertex data
+      *(stbvox_mesh_face *) (vertices[0]+1) = face;
+      *(stbvox_mesh_face *) (vertices[1]+1) = face;
+      *(stbvox_mesh_face *) (vertices[2]+1) = face;
+      *(stbvox_mesh_face *) (vertices[3]+1) = face;
+   #else
+      *(stbvox_mesh_face *) mm->output_cur[mesh][1] = face;
+      mm->output_cur[mesh][1] += 4;
+   #endif
+}
+
+void stbvox_make_mesh_for_face(stbvox_mesh_maker *mm, stbvox_rotate rot, int face, int v_off, stbvox_pos pos, stbvox_mesh_vertex vertbase, stbvox_mesh_vertex *face_coord, unsigned char mesh, int normal)
+{
+   stbvox_mesh_face face_data = stbvox_compute_mesh_face_value(mm,rot,face,v_off, normal);
+
+   // still need to compute ao & texlerp for each vertex
+
+   // first compute texlerp into p1
+   stbvox_mesh_vertex p1[4] = { 0 };
+
+   #if defined(STBVOX_CONFIG_DOWN_TEXLERP_PACKED) && defined(STBVOX_CONFIG_UP_TEXLERP_PACKED)
+      #define STBVOX_USE_PACKED(f) ((f) == STBVOX_FACE_up || (f) == STBVOX_FACE_down)
+   #elif defined(STBVOX_CONFIG_UP_TEXLERP_PACKED)
+      #define STBVOX_USE_PACKED(f) ((f) == STBVOX_FACE_up                           )
+   #elif defined(STBVOX_CONFIG_DOWN_TEXLERP_PACKED)
+      #define STBVOX_USE_PACKED(f) (                         (f) == STBVOX_FACE_down)
+   #endif
+
+   #if defined(STBVOX_CONFIG_DOWN_TEXLERP_PACKED) || defined(STBVOX_CONFIG_UP_TEXLERP_PACKED)
+   if (STBVOX_USE_PACKED(face)) {
+      if (!mm->input.packed_compact || 0==(mm->input.packed_compact[v_off]&16))
+         goto set_default;
+      p1[0] = (mm->input.packed_compact[v_off + mm->cube_vertex_offset[face][0]] >> 5);
+      p1[1] = (mm->input.packed_compact[v_off + mm->cube_vertex_offset[face][1]] >> 5);
+      p1[2] = (mm->input.packed_compact[v_off + mm->cube_vertex_offset[face][2]] >> 5);
+      p1[3] = (mm->input.packed_compact[v_off + mm->cube_vertex_offset[face][3]] >> 5);
+      p1[0] = stbvox_vertex_encode(0,0,0,0,p1[0]);
+      p1[1] = stbvox_vertex_encode(0,0,0,0,p1[1]);
+      p1[2] = stbvox_vertex_encode(0,0,0,0,p1[2]);
+      p1[3] = stbvox_vertex_encode(0,0,0,0,p1[3]);
+      goto skip;
+   }
+   #endif
+
+   if (mm->input.block_texlerp) {
+      stbvox_block_type bt = mm->input.blocktype[v_off];
+      unsigned char val = mm->input.block_texlerp[bt];
+      p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,val);
+   } else if (mm->input.block_texlerp_face) {
+      stbvox_block_type bt = mm->input.blocktype[v_off];
+      unsigned char bt_face = STBVOX_ROTATE(face, rot.block);
+      unsigned char val = mm->input.block_texlerp_face[bt][bt_face];
+      p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,val);
+   } else if (mm->input.texlerp_face3) {
+      unsigned char val = (mm->input.texlerp_face3[v_off] >> stbvox_face3_lerp[face]) & 7;
+      if (face >= STBVOX_FACE_up)
+         val = stbvox_face3_updown[val];
+      p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,val);
+   } else if (mm->input.texlerp_simple) {
+      unsigned char val = mm->input.texlerp_simple[v_off];
+      unsigned char lerp_face = (val >> 2) & 7;
+      if (lerp_face == face) {
+         p1[0] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][0]] >> 5) & 7;
+         p1[1] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][1]] >> 5) & 7;
+         p1[2] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][2]] >> 5) & 7;
+         p1[3] = (mm->input.texlerp_simple[v_off + mm->cube_vertex_offset[face][3]] >> 5) & 7;
+         p1[0] = stbvox_vertex_encode(0,0,0,0,p1[0]);
+         p1[1] = stbvox_vertex_encode(0,0,0,0,p1[1]);
+         p1[2] = stbvox_vertex_encode(0,0,0,0,p1[2]);
+         p1[3] = stbvox_vertex_encode(0,0,0,0,p1[3]);
+      } else {
+         unsigned char base = stbvox_vert_lerp_for_simple[val&3];
+         p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,base);
+      }
+   } else if (mm->input.texlerp) {
+      unsigned char facelerp = (mm->input.texlerp[v_off] >> stbvox_face_lerp[face]) & 3;
+      if (facelerp == STBVOX_TEXLERP_FACE_use_vert) {
+         if (mm->input.texlerp_vert3 && face != STBVOX_FACE_down) {
+            unsigned char shift = stbvox_vert3_lerp[face];
+            p1[0] = (mm->input.texlerp_vert3[mm->cube_vertex_offset[face][0]] >> shift) & 7;
+            p1[1] = (mm->input.texlerp_vert3[mm->cube_vertex_offset[face][1]] >> shift) & 7;
+            p1[2] = (mm->input.texlerp_vert3[mm->cube_vertex_offset[face][2]] >> shift) & 7;
+            p1[3] = (mm->input.texlerp_vert3[mm->cube_vertex_offset[face][3]] >> shift) & 7;
+         } else {
+            p1[0] = stbvox_vert_lerp_for_simple[mm->input.texlerp[mm->cube_vertex_offset[face][0]]>>6];
+            p1[1] = stbvox_vert_lerp_for_simple[mm->input.texlerp[mm->cube_vertex_offset[face][1]]>>6];
+            p1[2] = stbvox_vert_lerp_for_simple[mm->input.texlerp[mm->cube_vertex_offset[face][2]]>>6];
+            p1[3] = stbvox_vert_lerp_for_simple[mm->input.texlerp[mm->cube_vertex_offset[face][3]]>>6];
+         }
+         p1[0] = stbvox_vertex_encode(0,0,0,0,p1[0]);
+         p1[1] = stbvox_vertex_encode(0,0,0,0,p1[1]);
+         p1[2] = stbvox_vertex_encode(0,0,0,0,p1[2]);
+         p1[3] = stbvox_vertex_encode(0,0,0,0,p1[3]);
+      } else {
+         p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,stbvox_vert_lerp_for_face_lerp[facelerp]);
+      }
+   } else {
+      #if defined(STBVOX_CONFIG_UP_TEXLERP_PACKED) || defined(STBVOX_CONFIG_DOWN_TEXLERP_PACKED)
+      set_default:
+      #endif
+      p1[0] = p1[1] = p1[2] = p1[3] = stbvox_vertex_encode(0,0,0,0,7); // @TODO make this configurable
+   }
+
+   #if defined(STBVOX_CONFIG_UP_TEXLERP_PACKED) || defined(STBVOX_CONFIG_DOWN_TEXLERP_PACKED)
+   skip:
+   #endif
+
+   // now compute lighting and store to vertices
+   {
+      stbvox_mesh_vertex *mv[4];
+      stbvox_get_quad_vertex_pointer(mm, mesh, mv, face_data);
+
+      if (mm->input.lighting) {
+         // @TODO: lighting at block centers, but not gathered, instead constant-per-face
+         if (mm->input.lighting_at_vertices) {
+            int i;
+            for (i=0; i < 4; ++i) {
+               *mv[i] = vertbase + face_coord[i]
+                          + stbvox_vertex_encode(0,0,0,mm->input.lighting[v_off + mm->cube_vertex_offset[face][i]] & 63,0)
+                          + p1[i];
+            }
+         } else {
+            unsigned char *amb = &mm->input.lighting[v_off];
+            int i,j;
+            #if defined(STBVOX_CONFIG_ROTATION_IN_LIGHTING) || defined(STBVOX_CONFIG_VHEIGHT_IN_LIGHTING)
+            #define STBVOX_GET_LIGHTING(light) ((light) & ~3)
+            #define STBVOX_LIGHTING_ROUNDOFF   8
+            #else
+            #define STBVOX_GET_LIGHTING(light) (light)
+            #define STBVOX_LIGHTING_ROUNDOFF   2
+            #endif
+
+            for (i=0; i < 4; ++i) {
+               // for each vertex, gather from the four neighbor blocks it's facing
+               unsigned char *vamb = &amb[mm->cube_vertex_offset[face][i]];
+               int total=0;
+               for (j=0; j < 4; ++j)
+                  total += STBVOX_GET_LIGHTING(vamb[mm->vertex_gather_offset[face][j]]);
+               *mv[i] = vertbase + face_coord[i]
+                          + stbvox_vertex_encode(0,0,0,(total+STBVOX_LIGHTING_ROUNDOFF)>>4,0)
+                          + p1[i];
+                          // >> 4 is because:
+                          //   >> 2 to divide by 4 to get average over 4 samples
+                          //   >> 2 because input is 8 bits, output is 6 bits
+            }
+
+            // @TODO: note that gathering baked *lighting*
+            // is different from gathering baked ao; baked ao can count
+            // solid blocks as 0 ao, but baked lighting wants average
+            // of non-blocked--not take average & treat blocked as 0. And
+            // we can't bake the right value into the solid blocks
+            // because they can have different lighting values on
+            // different sides. So we need to actually gather and
+            // then divide by 0..4 (which we can do with a table-driven
+            // multiply, or have an 'if' for the 3 case)
+
+         }
+      } else {
+         vertbase += stbvox_vertex_encode(0,0,0,63,0);
+         *mv[0] = vertbase + face_coord[0] + p1[0];
+         *mv[1] = vertbase + face_coord[1] + p1[1];
+         *mv[2] = vertbase + face_coord[2] + p1[2];
+         *mv[3] = vertbase + face_coord[3] + p1[3];
+      }
+   }
+}
+
+// get opposite-facing normal & texgen for opposite face, used to map up-facing vheight data to down-facing data
+static unsigned char stbvox_reverse_face[STBVF_count] =
+{
+   STBVF_w, STBVF_s, STBVF_e, STBVF_n, STBVF_d   , STBVF_u   , STBVF_wd, STBVF_wu,
+         0,       0,       0,       0, STBVF_sw_d, STBVF_sw_u, STBVF_sd, STBVF_su,
+         0,       0,       0,       0, STBVF_se_d, STBVF_se_u, STBVF_ed, STBVF_eu,
+         0,       0,       0,       0, STBVF_ne_d, STBVF_ne_d, STBVF_nd, STBVF_nu
+};
+
+#ifndef STBVOX_CONFIG_OPTIMIZED_VHEIGHT
+// render non-planar quads by splitting into two triangles, rendering each as a degenerate quad
+static void stbvox_make_12_split_mesh_for_face(stbvox_mesh_maker *mm, stbvox_rotate rot, int face, int v_off, stbvox_pos pos, stbvox_mesh_vertex vertbase, stbvox_mesh_vertex *face_coord, unsigned char mesh, unsigned char *ht)
+{
+   stbvox_mesh_vertex v[4];
+
+   unsigned char normal1 = stbvox_face_up_normal_012[ht[2]][ht[1]][ht[0]];
+   unsigned char normal2 = stbvox_face_up_normal_123[ht[3]][ht[2]][ht[1]];
+
+   if (face == STBVOX_FACE_down) {
+      normal1 = stbvox_reverse_face[normal1];
+      normal2 = stbvox_reverse_face[normal2];
+   }
+
+   // the floor side face_coord is stored in order NW,NE,SE,SW, but ht[] is stored SW,SE,NW,NE
+   v[0] = face_coord[2];
+   v[1] = face_coord[3];
+   v[2] = face_coord[0];
+   v[3] = face_coord[2];
+   stbvox_make_mesh_for_face(mm, rot, face, v_off, pos, vertbase, v, mesh, normal1);
+   v[1] = face_coord[0];
+   v[2] = face_coord[1];
+   stbvox_make_mesh_for_face(mm, rot, face, v_off, pos, vertbase, v, mesh, normal2);
+}
+
+static void stbvox_make_03_split_mesh_for_face(stbvox_mesh_maker *mm, stbvox_rotate rot, int face, int v_off, stbvox_pos pos, stbvox_mesh_vertex vertbase, stbvox_mesh_vertex *face_coord, unsigned char mesh, unsigned char *ht)
+{
+   stbvox_mesh_vertex v[4];
+
+   unsigned char normal1 = stbvox_face_up_normal_013[ht[3]][ht[1]][ht[0]];
+   unsigned char normal2 = stbvox_face_up_normal_023[ht[3]][ht[2]][ht[0]];
+
+   if (face == STBVOX_FACE_down) {
+      normal1 = stbvox_reverse_face[normal1];
+      normal2 = stbvox_reverse_face[normal2];
+   }
+
+   v[0] = face_coord[1];
+   v[1] = face_coord[2];
+   v[2] = face_coord[3];
+   v[3] = face_coord[1];
+   stbvox_make_mesh_for_face(mm, rot, face, v_off, pos, vertbase, v, mesh, normal1);
+   v[1] = face_coord[3];
+   v[2] = face_coord[0];
+   stbvox_make_mesh_for_face(mm, rot, face, v_off, pos, vertbase, v, mesh, normal2);  // this one is correct!
+}
+#endif
+
+#ifndef STBVOX_CONFIG_PRECISION_Z
+#define STBVOX_CONFIG_PRECISION_Z 1
+#endif
+
+// simple case for mesh generation: we have only solid and empty blocks
+static void stbvox_make_mesh_for_block(stbvox_mesh_maker *mm, stbvox_pos pos, int v_off, stbvox_mesh_vertex *vmesh)
+{
+   int ns_off = mm->y_stride_in_bytes;
+   int ew_off = mm->x_stride_in_bytes;
+
+   unsigned char *blockptr = &mm->input.blocktype[v_off];
+   stbvox_mesh_vertex basevert = stbvox_vertex_encode(pos.x, pos.y, pos.z << STBVOX_CONFIG_PRECISION_Z , 0,0);
+
+   stbvox_rotate rot = { 0,0,0,0 };
+   unsigned char simple_rot = 0;
+
+   unsigned char mesh = mm->default_mesh;
+
+   if (mm->input.selector)
+      mesh = mm->input.selector[v_off];
+   else if (mm->input.block_selector)
+      mesh = mm->input.block_selector[mm->input.blocktype[v_off]];
+
+   // check if we're going off the end
+   if (mm->output_cur[mesh][0] + mm->output_size[mesh][0]*6 > mm->output_end[mesh][0]) {
+      mm->full = 1;
+      return;
+   }
+
+   #ifdef STBVOX_CONFIG_ROTATION_IN_LIGHTING
+   simple_rot = mm->input.lighting[v_off] & 3;
+   #endif
+
+   if (mm->input.packed_compact)
+      simple_rot = mm->input.packed_compact[v_off] & 3;
+
+   if (blockptr[ 1]==0) {
+      rot.facerot = simple_rot;
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_up  , v_off, pos, basevert, vmesh+4*STBVOX_FACE_up, mesh, STBVOX_FACE_up);
+   }
+   if (blockptr[-1]==0) {
+      rot.facerot = (-simple_rot) & 3;
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_down, v_off, pos, basevert, vmesh+4*STBVOX_FACE_down, mesh, STBVOX_FACE_down);
+   }
+
+   if (mm->input.rotate) {
+      unsigned char val = mm->input.rotate[v_off];
+      rot.block   = (val >> 0) & 3;
+      rot.overlay = (val >> 2) & 3;
+      //rot.tex2    = (val >> 4) & 3;
+      rot.ecolor  = (val >> 6) & 3;
+   } else {
+      rot.block = rot.overlay = rot.ecolor = simple_rot;
+   }
+   rot.facerot = 0;
+
+   if (blockptr[ ns_off]==0)
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_north, v_off, pos, basevert, vmesh+4*STBVOX_FACE_north, mesh, STBVOX_FACE_north);
+   if (blockptr[-ns_off]==0)
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_south, v_off, pos, basevert, vmesh+4*STBVOX_FACE_south, mesh, STBVOX_FACE_south);
+   if (blockptr[ ew_off]==0)
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_east , v_off, pos, basevert, vmesh+4*STBVOX_FACE_east, mesh, STBVOX_FACE_east);
+   if (blockptr[-ew_off]==0)
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_west , v_off, pos, basevert, vmesh+4*STBVOX_FACE_west, mesh, STBVOX_FACE_west);
+}
+
+// complex case for mesh generation: we have lots of different
+// block types, and we don't want to generate faces of blocks
+// if they're hidden by neighbors.
+//
+// we use lots of tables to determine this: we have a table
+// which tells us what face type is generated for each type of
+// geometry, and then a table that tells us whether that type
+// is hidden by a neighbor.
+static void stbvox_make_mesh_for_block_with_geo(stbvox_mesh_maker *mm, stbvox_pos pos, int v_off)
+{
+   int ns_off = mm->y_stride_in_bytes;
+   int ew_off = mm->x_stride_in_bytes;
+   int visible_faces, visible_base;
+   unsigned char mesh;
+
+   // first gather the geometry info for this block and all neighbors
+
+   unsigned char bt, nbt[6];
+   unsigned char geo, ngeo[6];
+   unsigned char rot, nrot[6];
+
+   bt = mm->input.blocktype[v_off];
+   nbt[0] = mm->input.blocktype[v_off + ew_off];
+   nbt[1] = mm->input.blocktype[v_off + ns_off];
+   nbt[2] = mm->input.blocktype[v_off - ew_off];
+   nbt[3] = mm->input.blocktype[v_off - ns_off];
+   nbt[4] = mm->input.blocktype[v_off +      1];
+   nbt[5] = mm->input.blocktype[v_off -      1];
+   if (mm->input.geometry) {
+      int i;
+      geo = mm->input.geometry[v_off];
+      ngeo[0] = mm->input.geometry[v_off + ew_off];
+      ngeo[1] = mm->input.geometry[v_off + ns_off];
+      ngeo[2] = mm->input.geometry[v_off - ew_off];
+      ngeo[3] = mm->input.geometry[v_off - ns_off];
+      ngeo[4] = mm->input.geometry[v_off +      1];
+      ngeo[5] = mm->input.geometry[v_off -      1];
+
+      rot = (geo >> 4) & 3;
+      geo &= 15;
+      for (i=0; i < 6; ++i) {
+         nrot[i] = (ngeo[i] >> 4) & 3;
+         ngeo[i] &= 15;
+      }
+   } else {
+      int i;
+      assert(mm->input.block_geometry);
+      geo = mm->input.block_geometry[bt];
+      for (i=0; i < 6; ++i)
+         ngeo[i] = mm->input.block_geometry[nbt[i]];
+      if (mm->input.selector) {
+         #ifndef STBVOX_CONFIG_ROTATION_IN_LIGHTING
+         if (mm->input.packed_compact == NULL) {
+            rot     = (mm->input.selector[v_off         ] >> 4) & 3;
+            nrot[0] = (mm->input.selector[v_off + ew_off] >> 4) & 3;
+            nrot[1] = (mm->input.selector[v_off + ns_off] >> 4) & 3;
+            nrot[2] = (mm->input.selector[v_off - ew_off] >> 4) & 3;
+            nrot[3] = (mm->input.selector[v_off - ns_off] >> 4) & 3;
+            nrot[4] = (mm->input.selector[v_off +      1] >> 4) & 3;
+            nrot[5] = (mm->input.selector[v_off -      1] >> 4) & 3;
+         }
+         #endif
+      } else {
+         #ifndef STBVOX_CONFIG_ROTATION_IN_LIGHTING
+         if (mm->input.packed_compact == NULL) {
+            rot = (geo>>4)&3;
+            geo &= 15;
+            for (i=0; i < 6; ++i) {
+               nrot[i] = (ngeo[i]>>4)&3;
+               ngeo[i] &= 15;
+            }
+         }
+         #endif
+      }
+   }
+
+   #ifndef STBVOX_CONFIG_ROTATION_IN_LIGHTING
+   if (mm->input.packed_compact) {
+      rot = mm->input.packed_compact[rot] & 3;
+      nrot[0] = mm->input.packed_compact[v_off + ew_off] & 3;
+      nrot[1] = mm->input.packed_compact[v_off + ns_off] & 3;
+      nrot[2] = mm->input.packed_compact[v_off - ew_off] & 3;
+      nrot[3] = mm->input.packed_compact[v_off - ns_off] & 3;
+      nrot[4] = mm->input.packed_compact[v_off +      1] & 3;
+      nrot[5] = mm->input.packed_compact[v_off -      1] & 3;
+   }
+   #else
+   rot = mm->input.lighting[v_off] & 3;
+   nrot[0] = (mm->input.lighting[v_off + ew_off]) & 3;
+   nrot[1] = (mm->input.lighting[v_off + ns_off]) & 3;
+   nrot[2] = (mm->input.lighting[v_off - ew_off]) & 3;
+   nrot[3] = (mm->input.lighting[v_off - ns_off]) & 3;
+   nrot[4] = (mm->input.lighting[v_off +      1]) & 3;
+   nrot[5] = (mm->input.lighting[v_off -      1]) & 3;
+   #endif
+
+   if (geo == STBVOX_GEOM_transp) {
+      // transparency has a special rule: if the blocktype is the same,
+      // and the faces are compatible, then can hide them; otherwise,
+      // force them on
+      // Note that this means we don't support any transparentshapes other
+      // than solid blocks, since detecting them is too complicated. If
+      // you wanted to do something like minecraft water, you probably
+      // should just do that with a separate renderer anyway. (We don't
+      // support transparency sorting so you need to use alpha test
+      // anyway)
+      int i;
+      for (i=0; i < 6; ++i)
+         if (nbt[i] != bt) {
+            nbt[i] = 0;
+            ngeo[i] = STBVOX_GEOM_empty;
+         } else
+            ngeo[i] = STBVOX_GEOM_solid;
+      geo = STBVOX_GEOM_solid;
+   }
+
+   // now compute the face visibility
+   visible_base = stbvox_hasface[geo][rot];
+   // @TODO: assert(visible_base != 0); // we should have early-outted earlier in this case
+   visible_faces = 0;
+
+   // now, for every face that might be visible, check if neighbor hides it
+   if (visible_base & (1 << STBVOX_FACE_east)) {
+      int  type = stbvox_facetype[ geo   ][(STBVOX_FACE_east+ rot   )&3];
+      int ntype = stbvox_facetype[ngeo[0]][(STBVOX_FACE_west+nrot[0])&3];
+      visible_faces |= ((stbvox_face_visible[type]) >> (ntype + 5 - STBVOX_FACE_east)) & (1 << STBVOX_FACE_east);
+   }
+   if (visible_base & (1 << STBVOX_FACE_north)) {
+      int  type = stbvox_facetype[ geo   ][(STBVOX_FACE_north+ rot   )&3];
+      int ntype = stbvox_facetype[ngeo[1]][(STBVOX_FACE_south+nrot[1])&3];
+      visible_faces |= ((stbvox_face_visible[type]) >> (ntype + 5 - STBVOX_FACE_north)) & (1 << STBVOX_FACE_north);
+   }
+   if (visible_base & (1 << STBVOX_FACE_west)) {
+      int  type = stbvox_facetype[ geo   ][(STBVOX_FACE_west+ rot   )&3];
+      int ntype = stbvox_facetype[ngeo[2]][(STBVOX_FACE_east+nrot[2])&3];
+      visible_faces |= ((stbvox_face_visible[type]) >> (ntype + 5 - STBVOX_FACE_west)) & (1 << STBVOX_FACE_west);
+   }
+   if (visible_base & (1 << STBVOX_FACE_south)) {
+      int  type = stbvox_facetype[ geo   ][(STBVOX_FACE_south+ rot   )&3];
+      int ntype = stbvox_facetype[ngeo[3]][(STBVOX_FACE_north+nrot[3])&3];
+      visible_faces |= ((stbvox_face_visible[type]) >> (ntype + 5 - STBVOX_FACE_south)) & (1 << STBVOX_FACE_south);
+   }
+   if (visible_base & (1 << STBVOX_FACE_up)) {
+      int  type = stbvox_facetype[ geo   ][STBVOX_FACE_up];
+      int ntype = stbvox_facetype[ngeo[4]][STBVOX_FACE_down];
+      visible_faces |= ((stbvox_face_visible[type]) >> (ntype + 5 - STBVOX_FACE_up)) & (1 << STBVOX_FACE_up);
+   }
+   if (visible_base & (1 << STBVOX_FACE_down)) {
+      int  type = stbvox_facetype[ geo   ][STBVOX_FACE_down];
+      int ntype = stbvox_facetype[ngeo[5]][STBVOX_FACE_up];
+      visible_faces |= ((stbvox_face_visible[type]) >> (ntype + 5 - STBVOX_FACE_down)) & (1 << STBVOX_FACE_down);
+   }
+
+   if (geo == STBVOX_GEOM_force)
+      geo = STBVOX_GEOM_solid;
+
+   assert((geo == STBVOX_GEOM_crossed_pair) ? (visible_faces == 15) : 1);
+
+   // now we finally know for sure which faces are getting generated
+   if (visible_faces == 0)
+      return;
+
+   mesh = mm->default_mesh;
+   if (mm->input.selector)
+      mesh = mm->input.selector[v_off];
+   else if (mm->input.block_selector)
+      mesh = mm->input.block_selector[bt];
+
+   if (geo <= STBVOX_GEOM_ceil_slope_north_is_bottom) {
+      // this is the simple case, we can just use regular block gen with special vmesh calculated with vheight
+      stbvox_mesh_vertex basevert;
+      stbvox_mesh_vertex vmesh[6][4];
+      stbvox_rotate rotate = { 0,0,0,0 };
+      unsigned char simple_rot = rot;
+      int i;
+      // we only need to do this for the displayed faces, but it's easier
+      // to just do it up front; @OPTIMIZE check if it's faster to do it
+      // for visible faces only
+      for (i=0; i < 6*4; ++i) {
+         int vert = stbvox_vertex_selector[0][i];
+         vert = stbvox_rotate_vertex[vert][rot];
+         vmesh[0][i] = stbvox_vmesh_pre_vheight[0][i]
+                     + stbvox_geometry_vheight[geo][vert];
+      }
+
+      basevert = stbvox_vertex_encode(pos.x, pos.y, pos.z << STBVOX_CONFIG_PRECISION_Z, 0,0);
+      if (mm->input.selector) {
+         mesh = mm->input.selector[v_off];
+      } else if (mm->input.block_selector)
+         mesh = mm->input.block_selector[bt];
+
+
+      // check if we're going off the end
+      if (mm->output_cur[mesh][0] + mm->output_size[mesh][0]*6 > mm->output_end[mesh][0]) {
+         mm->full = 1;
+         return;
+      }
+
+      if (geo >= STBVOX_GEOM_floor_slope_north_is_top) {
+         if (visible_faces & (1 << STBVOX_FACE_up)) {
+            int normal = geo == STBVOX_GEOM_floor_slope_north_is_top ? stbvox_floor_slope_for_rot[simple_rot] : STBVOX_FACE_up;
+            rotate.facerot = simple_rot;
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_up  , v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, normal);
+         }
+         if (visible_faces & (1 << STBVOX_FACE_down)) {
+            int normal = geo == STBVOX_GEOM_ceil_slope_north_is_bottom ? stbvox_ceil_slope_for_rot[simple_rot] : STBVOX_FACE_down;
+            rotate.facerot = (-rotate.facerot) & 3;
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, normal);
+         }
+      } else {
+         if (visible_faces & (1 << STBVOX_FACE_up)) {
+            rotate.facerot = simple_rot;
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_up  , v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, STBVOX_FACE_up);
+         }
+         if (visible_faces & (1 << STBVOX_FACE_down)) {
+            rotate.facerot = (-rotate.facerot) & 3;
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, STBVOX_FACE_down);
+         }
+      }
+
+      if (mm->input.rotate) {
+         unsigned char val = mm->input.rotate[v_off];
+         rotate.block   = (val >> 0) & 3;
+         rotate.overlay = (val >> 2) & 3;
+         //rotate.tex2    = (val >> 4) & 3;
+         rotate.ecolor  = (val >> 6) & 3;
+      } else {
+         rotate.block = rotate.overlay = rotate.ecolor = simple_rot;
+      }
+
+      rotate.facerot = 0;
+
+      if (visible_faces & (1 << STBVOX_FACE_north))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_north, v_off, pos, basevert, vmesh[STBVOX_FACE_north], mesh, STBVOX_FACE_north);
+      if (visible_faces & (1 << STBVOX_FACE_south))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_south, v_off, pos, basevert, vmesh[STBVOX_FACE_south], mesh, STBVOX_FACE_south);
+      if (visible_faces & (1 << STBVOX_FACE_east))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_east , v_off, pos, basevert, vmesh[STBVOX_FACE_east ], mesh, STBVOX_FACE_east);
+      if (visible_faces & (1 << STBVOX_FACE_west))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_west , v_off, pos, basevert, vmesh[STBVOX_FACE_west ], mesh, STBVOX_FACE_west);
+   }
+   if (geo >= STBVOX_GEOM_floor_vheight_03) {
+      // this case can also be generated with regular block gen with special vmesh,
+      // except:
+      //     if we want to generate middle diagonal for 'weird' blocks
+      //     it's more complicated to detect neighbor matchups
+      stbvox_mesh_vertex vmesh[6][4];
+      stbvox_mesh_vertex cube[8];
+      stbvox_mesh_vertex basevert;
+      stbvox_rotate rotate = { 0,0,0,0 };
+      unsigned char simple_rot = rot;
+      unsigned char ht[4];
+      int extreme;
+
+      // extract the heights
+      #ifdef STBVOX_CONFIG_VHEIGHT_IN_LIGHTING
+      ht[0] = mm->input.lighting[v_off              ] & 3;
+      ht[1] = mm->input.lighting[v_off+ew_off       ] & 3;
+      ht[2] = mm->input.lighting[v_off       +ns_off] & 3;
+      ht[3] = mm->input.lighting[v_off+ew_off+ns_off] & 3;
+      #else
+      if (mm->input.vheight) {
+         unsigned char v =  mm->input.vheight[v_off];
+         ht[0] = (v >> 0) & 3;
+         ht[1] = (v >> 2) & 3;
+         ht[2] = (v >> 4) & 3;
+         ht[3] = (v >> 6) & 3;
+      } else if (mm->input.block_vheight) {
+         unsigned char v = mm->input.block_vheight[bt];
+         unsigned char raw[4];
+         int i;
+
+         raw[0] = (v >> 0) & 3;
+         raw[1] = (v >> 2) & 3;
+         raw[2] = (v >> 4) & 3;
+         raw[3] = (v >> 6) & 3;
+
+         for (i=0; i < 4; ++i)
+            ht[i] = raw[stbvox_rotate_vertex[i][rot]];
+      } else if (mm->input.packed_compact) {
+         ht[0] = (mm->input.packed_compact[v_off              ] >> 2) & 3;
+         ht[1] = (mm->input.packed_compact[v_off+ew_off       ] >> 2) & 3;
+         ht[2] = (mm->input.packed_compact[v_off       +ns_off] >> 2) & 3;
+         ht[3] = (mm->input.packed_compact[v_off+ew_off+ns_off] >> 2) & 3;
+      } else if (mm->input.geometry) {
+         ht[0] = mm->input.geometry[v_off              ] >> 6;
+         ht[1] = mm->input.geometry[v_off+ew_off       ] >> 6;
+         ht[2] = mm->input.geometry[v_off       +ns_off] >> 6;
+         ht[3] = mm->input.geometry[v_off+ew_off+ns_off] >> 6;
+      } else {
+         assert(0);
+      }
+      #endif
+
+      // flag whether any sides go off the top of the block, which means
+      // our visible_faces test was wrong
+      extreme = (ht[0] == 3 || ht[1] == 3 || ht[2] == 3 || ht[3] == 3);
+
+      if (geo >= STBVOX_GEOM_ceil_vheight_03) {
+         cube[0] = stbvox_vertex_encode(0,0,ht[0],0,0);
+         cube[1] = stbvox_vertex_encode(0,0,ht[1],0,0);
+         cube[2] = stbvox_vertex_encode(0,0,ht[2],0,0);
+         cube[3] = stbvox_vertex_encode(0,0,ht[3],0,0);
+         cube[4] = stbvox_vertex_encode(0,0,2,0,0);
+         cube[5] = stbvox_vertex_encode(0,0,2,0,0);
+         cube[6] = stbvox_vertex_encode(0,0,2,0,0);
+         cube[7] = stbvox_vertex_encode(0,0,2,0,0);
+      } else {
+         cube[0] = stbvox_vertex_encode(0,0,0,0,0);
+         cube[1] = stbvox_vertex_encode(0,0,0,0,0);
+         cube[2] = stbvox_vertex_encode(0,0,0,0,0);
+         cube[3] = stbvox_vertex_encode(0,0,0,0,0);
+         cube[4] = stbvox_vertex_encode(0,0,ht[0],0,0);
+         cube[5] = stbvox_vertex_encode(0,0,ht[1],0,0);
+         cube[6] = stbvox_vertex_encode(0,0,ht[2],0,0);
+         cube[7] = stbvox_vertex_encode(0,0,ht[3],0,0);
+      }
+      if (!mm->input.vheight && mm->input.block_vheight) {
+         // @TODO: support block vheight here, I've forgotten what needs to be done specially
+      }
+
+      // build vertex mesh
+      {
+         int i;
+         for (i=0; i < 6*4; ++i) {
+            int vert = stbvox_vertex_selector[0][i];
+            vmesh[0][i] = stbvox_vmesh_pre_vheight[0][i]
+                        + cube[vert];
+         }
+      }
+
+      basevert = stbvox_vertex_encode(pos.x, pos.y, pos.z << STBVOX_CONFIG_PRECISION_Z, 0,0);
+      // check if we're going off the end
+      if (mm->output_cur[mesh][0] + mm->output_size[mesh][0]*6 > mm->output_end[mesh][0]) {
+         mm->full = 1;
+         return;
+      }
+
+      // @TODO generate split faces
+      if (visible_faces & (1 << STBVOX_FACE_up)) {
+         if (geo >= STBVOX_GEOM_ceil_vheight_03)
+            // flat
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_up  , v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, STBVOX_FACE_up);
+         else {
+         #ifndef STBVOX_CONFIG_OPTIMIZED_VHEIGHT
+            // check if it's non-planar
+            if (cube[5] + cube[6] != cube[4] + cube[7]) {
+               // not planar, split along diagonal and make degenerate quads
+               if (geo == STBVOX_GEOM_floor_vheight_03)
+                  stbvox_make_03_split_mesh_for_face(mm, rotate, STBVOX_FACE_up, v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, ht);
+               else
+                  stbvox_make_12_split_mesh_for_face(mm, rotate, STBVOX_FACE_up, v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, ht);
+            } else
+               stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_up  , v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, stbvox_planar_face_up_normal[ht[2]][ht[1]][ht[0]]);
+         #else
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_up  , v_off, pos, basevert, vmesh[STBVOX_FACE_up], mesh, stbvox_optimized_face_up_normal[ht[3]][ht[2]][ht[1]][ht[0]]);
+         #endif
+         }
+      }
+      if (visible_faces & (1 << STBVOX_FACE_down)) {
+         if (geo < STBVOX_GEOM_ceil_vheight_03)
+            // flat
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, STBVOX_FACE_down);
+         else {
+         #ifndef STBVOX_CONFIG_OPTIMIZED_VHEIGHT
+            // check if it's non-planar
+            if (cube[1] + cube[2] != cube[0] + cube[3]) {
+               // not planar, split along diagonal and make degenerate quads
+               if (geo == STBVOX_GEOM_ceil_vheight_03)
+                  stbvox_make_03_split_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, ht);
+               else
+                  stbvox_make_12_split_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, ht);
+            } else
+               stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, stbvox_reverse_face[stbvox_planar_face_up_normal[ht[2]][ht[1]][ht[0]]]);
+         #else
+            stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_down, v_off, pos, basevert, vmesh[STBVOX_FACE_down], mesh, stbvox_reverse_face[stbvox_optimized_face_up_normal[ht[3]][ht[2]][ht[1]][ht[0]]]);
+         #endif
+         }
+      }
+
+      if (mm->input.rotate) {
+         unsigned char val = mm->input.rotate[v_off];
+         rotate.block   = (val >> 0) & 3;
+         rotate.overlay = (val >> 2) & 3;
+         //rotate.tex2    = (val >> 4) & 3;
+         rotate.ecolor  = (val >> 6) & 3;
+      } else if (mm->input.selector) {
+         rotate.block = rotate.overlay = rotate.ecolor = simple_rot;
+      }
+
+      if ((visible_faces & (1 << STBVOX_FACE_north)) || (extreme && (ht[2] == 3 || ht[3] == 3)))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_north, v_off, pos, basevert, vmesh[STBVOX_FACE_north], mesh, STBVOX_FACE_north);
+      if ((visible_faces & (1 << STBVOX_FACE_south)) || (extreme && (ht[0] == 3 || ht[1] == 3)))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_south, v_off, pos, basevert, vmesh[STBVOX_FACE_south], mesh, STBVOX_FACE_south);
+      if ((visible_faces & (1 << STBVOX_FACE_east)) || (extreme && (ht[1] == 3 || ht[3] == 3)))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_east , v_off, pos, basevert, vmesh[STBVOX_FACE_east ], mesh, STBVOX_FACE_east);
+      if ((visible_faces & (1 << STBVOX_FACE_west)) || (extreme && (ht[0] == 3 || ht[2] == 3)))
+         stbvox_make_mesh_for_face(mm, rotate, STBVOX_FACE_west , v_off, pos, basevert, vmesh[STBVOX_FACE_west ], mesh, STBVOX_FACE_west);
+   }
+
+   if (geo == STBVOX_GEOM_crossed_pair) {
+      // this can be generated with a special vmesh
+      stbvox_mesh_vertex basevert = stbvox_vertex_encode(pos.x, pos.y, pos.z << STBVOX_CONFIG_PRECISION_Z , 0,0);
+      unsigned char simple_rot=0;
+      stbvox_rotate rot = { 0,0,0,0 };
+      unsigned char mesh = mm->default_mesh;
+      if (mm->input.selector) {
+         mesh = mm->input.selector[v_off];
+         simple_rot = mesh >> 4;
+         mesh &= 15;
+      }
+      if (mm->input.block_selector) {
+         mesh = mm->input.block_selector[bt];
+      }
+
+      // check if we're going off the end
+      if (mm->output_cur[mesh][0] + mm->output_size[mesh][0]*4 > mm->output_end[mesh][0]) {
+         mm->full = 1;
+         return;
+      }
+
+      if (mm->input.rotate) {
+         unsigned char val = mm->input.rotate[v_off];
+         rot.block   = (val >> 0) & 3;
+         rot.overlay = (val >> 2) & 3;
+         //rot.tex2    = (val >> 4) & 3;
+         rot.ecolor  = (val >> 6) & 3;
+      } else if (mm->input.selector) {
+         rot.block = rot.overlay = rot.ecolor = simple_rot;
+      }
+      rot.facerot = 0;
+
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_north, v_off, pos, basevert, stbvox_vmesh_crossed_pair[STBVOX_FACE_north], mesh, STBVF_ne_u_cross);
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_south, v_off, pos, basevert, stbvox_vmesh_crossed_pair[STBVOX_FACE_south], mesh, STBVF_sw_u_cross);
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_east , v_off, pos, basevert, stbvox_vmesh_crossed_pair[STBVOX_FACE_east ], mesh, STBVF_se_u_cross);
+      stbvox_make_mesh_for_face(mm, rot, STBVOX_FACE_west , v_off, pos, basevert, stbvox_vmesh_crossed_pair[STBVOX_FACE_west ], mesh, STBVF_nw_u_cross);
+   }
+
+
+   // @TODO
+   // STBVOX_GEOM_floor_slope_north_is_top_as_wall,
+   // STBVOX_GEOM_ceil_slope_north_is_bottom_as_wall,
+}
+
+static void stbvox_make_mesh_for_column(stbvox_mesh_maker *mm, int x, int y, int z0)
+{
+   stbvox_pos pos;
+   int v_off = x * mm->x_stride_in_bytes + y * mm->y_stride_in_bytes;
+   int ns_off = mm->y_stride_in_bytes;
+   int ew_off = mm->x_stride_in_bytes;
+   pos.x = x;
+   pos.y = y;
+   pos.z = 0;
+   if (mm->input.geometry) {
+      unsigned char *bt  = mm->input.blocktype + v_off;
+      unsigned char *geo = mm->input.geometry + v_off;
+      int z;
+      for (z=z0; z < mm->z1; ++z) {
+         if (bt[z] && ( !bt[z+ns_off] || !STBVOX_GET_GEO(geo[z+ns_off]) || !bt[z-ns_off] || !STBVOX_GET_GEO(geo[z-ns_off])
+                      || !bt[z+ew_off] || !STBVOX_GET_GEO(geo[z+ew_off]) || !bt[z-ew_off] || !STBVOX_GET_GEO(geo[z-ew_off])
+                      || !bt[z-1] || !STBVOX_GET_GEO(geo[z-1]) || !bt[z+1] || !STBVOX_GET_GEO(geo[z+1])))
+         {  // TODO check up and down
+            pos.z = z;
+            stbvox_make_mesh_for_block_with_geo(mm, pos, v_off+z);
+            if (mm->full) {
+               mm->cur_z = z;
+               return;
+            }
+         }
+      }
+   } else if (mm->input.block_geometry) {
+      int z;
+      unsigned char *bt  = mm->input.blocktype + v_off;
+      unsigned char *geo = mm->input.block_geometry;
+      for (z=z0; z < mm->z1; ++z) {
+         if (bt[z] && (    geo[bt[z+ns_off]] != STBVOX_GEOM_solid
+                        || geo[bt[z-ns_off]] != STBVOX_GEOM_solid
+                        || geo[bt[z+ew_off]] != STBVOX_GEOM_solid
+                        || geo[bt[z-ew_off]] != STBVOX_GEOM_solid
+                        || geo[bt[z-1]] != STBVOX_GEOM_solid
+                        || geo[bt[z+1]] != STBVOX_GEOM_solid))
+         {
+            pos.z = z;
+            stbvox_make_mesh_for_block_with_geo(mm, pos, v_off+z);
+            if (mm->full) {
+               mm->cur_z = z;
+               return;
+            }
+         }
+      }
+   } else {
+      unsigned char *bt = mm->input.blocktype + v_off;
+      int z;
+      #if STBVOX_CONFIG_PRECISION_Z == 1
+      stbvox_mesh_vertex *vmesh = stbvox_vmesh_delta_half_z[0];
+      #else
+      stbvox_mesh_vertex *vmesh = stbvox_vmesh_delta_normal[0];
+      #endif
+      for (z=z0; z < mm->z1; ++z) {
+         // if it's solid and at least one neighbor isn't solid
+         if (bt[z] && (!bt[z+ns_off] || !bt[z-ns_off] || !bt[z+ew_off] || !bt[z-ew_off] || !bt[z-1] || !bt[z+1])) {
+            pos.z = z;
+            stbvox_make_mesh_for_block(mm, pos, v_off+z, vmesh);
+            if (mm->full) {
+               mm->cur_z = z;
+               return;
+            }
+         }
+      }
+   }
+}
+
+static void stbvox_bring_up_to_date(stbvox_mesh_maker *mm)
+{
+   if (mm->config_dirty) {
+      int i;
+      #ifdef STBVOX_ICONFIG_FACE_ATTRIBUTE
+         mm->num_mesh_slots = 1;
+         for (i=0; i < STBVOX_MAX_MESHES; ++i) {
+            mm->output_size[i][0] = 32;
+            mm->output_step[i][0] = 8;
+         }
+      #else
+         mm->num_mesh_slots = 2;
+         for (i=0; i < STBVOX_MAX_MESHES; ++i) {
+            mm->output_size[i][0] = 16;
+            mm->output_step[i][0] = 4;
+            mm->output_size[i][1] = 4;
+            mm->output_step[i][1] = 4;
+         }
+      #endif
+
+      mm->config_dirty = 0;
+   }
+}
+
+int stbvox_make_mesh(stbvox_mesh_maker *mm)
+{
+   int x,y;
+   stbvox_bring_up_to_date(mm);
+   mm->full = 0;
+   if (mm->cur_x > mm->x0 || mm->cur_y > mm->y0 || mm->cur_z > mm->z0) {
+      stbvox_make_mesh_for_column(mm, mm->cur_x, mm->cur_y, mm->cur_z);
+      if (mm->full)
+         return 0;
+      ++mm->cur_y;
+      while (mm->cur_y < mm->y1 && !mm->full) {
+         stbvox_make_mesh_for_column(mm, mm->cur_x, mm->cur_y, mm->z0);
+         if (mm->full)
+            return 0;
+         ++mm->cur_y;
+      }
+      ++mm->cur_x;
+   }
+   for (x=mm->cur_x; x < mm->x1; ++x) {
+      for (y=mm->y0; y < mm->y1; ++y) {
+         stbvox_make_mesh_for_column(mm, x, y, mm->z0);
+         if (mm->full) {
+            mm->cur_x = x;
+            mm->cur_y = y;
+            return 0;
+         }
+      }
+   }
+   return 1;
+}
+
+void stbvox_init_mesh_maker(stbvox_mesh_maker *mm)
+{
+   memset(mm, 0, sizeof(*mm));
+   stbvox_build_default_palette();
+
+   mm->config_dirty = 1;
+   mm->default_mesh = 0;
+}
+
+int stbvox_get_buffer_count(stbvox_mesh_maker *mm)
+{
+   stbvox_bring_up_to_date(mm);
+   return mm->num_mesh_slots;
+}
+
+int stbvox_get_buffer_size_per_quad(stbvox_mesh_maker *mm, int n)
+{
+   return mm->output_size[0][n];
+}
+
+void stbvox_reset_buffers(stbvox_mesh_maker *mm)
+{
+   int i;
+   for (i=0; i < STBVOX_MAX_MESHES*STBVOX_MAX_MESH_SLOTS; ++i) {
+      mm->output_cur[0][i] = 0;
+      mm->output_buffer[0][i] = 0;
+   }
+}
+
+void stbvox_set_buffer(stbvox_mesh_maker *mm, int mesh, int slot, void *buffer, size_t len)
+{
+   int i;
+   stbvox_bring_up_to_date(mm);
+   mm->output_buffer[mesh][slot] = (char *) buffer;
+   mm->output_cur   [mesh][slot] = (char *) buffer;
+   mm->output_len   [mesh][slot] = (int) len;
+   mm->output_end   [mesh][slot] = (char *) buffer + len;
+   for (i=0; i < STBVOX_MAX_MESH_SLOTS; ++i) {
+      if (mm->output_buffer[mesh][i]) {
+         assert(mm->output_len[mesh][i] / mm->output_size[mesh][i] == mm->output_len[mesh][slot] / mm->output_size[mesh][slot]);
+      }
+   }
+}
+
+void stbvox_set_default_mesh(stbvox_mesh_maker *mm, int mesh)
+{
+   mm->default_mesh = mesh;
+}
+
+int stbvox_get_quad_count(stbvox_mesh_maker *mm, int mesh)
+{
+   return (int) ((mm->output_cur[mesh][0] - mm->output_buffer[mesh][0]) / mm->output_size[mesh][0]);
+}
+
+stbvox_input_description *stbvox_get_input_description(stbvox_mesh_maker *mm)
+{
+   return &mm->input;
+}
+
+void stbvox_set_input_range(stbvox_mesh_maker *mm, int x0, int y0, int z0, int x1, int y1, int z1)
+{
+   mm->x0 = x0;
+   mm->y0 = y0;
+   mm->z0 = z0;
+
+   mm->x1 = x1;
+   mm->y1 = y1;
+   mm->z1 = z1;
+
+   mm->cur_x = x0;
+   mm->cur_y = y0;
+   mm->cur_z = z0;
+
+   // @TODO validate that this range is representable in this mode
+}
+
+void stbvox_get_transform(stbvox_mesh_maker *mm, float transform[3][3])
+{
+   // scale
+   transform[0][0] = 1.0;
+   transform[0][1] = 1.0;
+   #if STBVOX_CONFIG_PRECISION_Z==1
+   transform[0][2] = 0.5f;
+   #else
+   transform[0][2] = 1.0f;
+   #endif
+   // translation
+   transform[1][0] = (float) (mm->pos_x);
+   transform[1][1] = (float) (mm->pos_y);
+   transform[1][2] = (float) (mm->pos_z);
+   // texture coordinate projection translation
+   transform[2][0] = (float) (mm->pos_x & 255); // @TODO depends on max texture scale
+   transform[2][1] = (float) (mm->pos_y & 255);
+   transform[2][2] = (float) (mm->pos_z & 255);
+}
+
+void stbvox_get_bounds(stbvox_mesh_maker *mm, float bounds[2][3])
+{
+   bounds[0][0] = (float) (mm->pos_x + mm->x0);
+   bounds[0][1] = (float) (mm->pos_y + mm->y0);
+   bounds[0][2] = (float) (mm->pos_z + mm->z0);
+   bounds[1][0] = (float) (mm->pos_x + mm->x1);
+   bounds[1][1] = (float) (mm->pos_y + mm->y1);
+   bounds[1][2] = (float) (mm->pos_z + mm->z1);
+}
+
+void stbvox_set_mesh_coordinates(stbvox_mesh_maker *mm, int x, int y, int z)
+{
+   mm->pos_x = x;
+   mm->pos_y = y;
+   mm->pos_z = z;
+}
+
+void stbvox_set_input_stride(stbvox_mesh_maker *mm, int x_stride_in_bytes, int y_stride_in_bytes)
+{
+   int f,v;
+   mm->x_stride_in_bytes = x_stride_in_bytes;
+   mm->y_stride_in_bytes = y_stride_in_bytes;
+   for (f=0; f < 6; ++f) {
+      for (v=0; v < 4; ++v) {
+         mm->cube_vertex_offset[f][v]   =   stbvox_vertex_vector[f][v][0]    * mm->x_stride_in_bytes
+                                         +  stbvox_vertex_vector[f][v][1]    * mm->y_stride_in_bytes
+                                         +  stbvox_vertex_vector[f][v][2]                           ;
+         mm->vertex_gather_offset[f][v] =  (stbvox_vertex_vector[f][v][0]-1) * mm->x_stride_in_bytes
+                                         + (stbvox_vertex_vector[f][v][1]-1) * mm->y_stride_in_bytes
+                                         + (stbvox_vertex_vector[f][v][2]-1)                        ;
+      }
+   }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+//
+//    offline computation of tables
+//
+
+#if 0
+// compute optimized vheight table
+static char *normal_names[32] =
+{
+   0,0,0,0,"u   ",0, "eu  ",0,
+   0,0,0,0,"ne_u",0, "nu  ",0,
+   0,0,0,0,"nw_u",0, "wu  ",0,
+   0,0,0,0,"sw_u",0, "su  ",0,
+};
+
+static char *find_best_normal(float x, float y, float z)
+{
+   int best_slot = 4;
+   float best_dot = 0;
+   int i;
+   for (i=0; i < 32; ++i) {
+      if (normal_names[i]) {
+         float dot = x * stbvox_default_normals[i][0] + y * stbvox_default_normals[i][1] + z * stbvox_default_normals[i][2];
+         if (dot > best_dot) {
+            best_dot = dot;
+            best_slot = i;
+         }
+      }
+   }
+   return normal_names[best_slot];
+}
+
+int main(int argc, char **argv)
+{
+   int sw,se,nw,ne;
+   for (ne=0; ne < 4; ++ne) {
+      for (nw=0; nw < 4; ++nw) {
+         for (se=0; se < 4; ++se) {
+            printf("        { ");
+            for (sw=0; sw < 4; ++sw) {
+               float x = (float) (nw + sw - ne - se);
+               float y = (float) (sw + se - nw - ne);
+               float z = 2;
+               printf("STBVF_%s, ", find_best_normal(x,y,z));
+            }
+            printf("},\n");
+         }
+      }
+   }
+   return 0;
+}
+#endif
+
+// @TODO
+//
+//   - test API for texture rotation on side faces
+//   - API for texture rotation on top & bottom
+//   - better culling of vheight faces with vheight neighbors
+//   - better culling of non-vheight faces with vheight neighbors
+//   - gather vertex lighting from slopes correctly
+//   - better support texture edge_clamp: currently if you fall
+//     exactly on 1.0 you get wrapped incorrectly; this is rare, but
+//     can avoid: compute texcoords in vertex shader, offset towards
+//     center before modding, need 2 bits per vertex to know offset direction)
+//   - other mesh modes (10,6,4-byte quads)
+//
+//
+// With TexBuffer for the fixed vertex data, we can actually do
+// minecrafty non-blocks like stairs -- we still probably only
+// want 256 or so, so we can't do the equivalent of all the vheight
+// combos, but that's ok. The 256 includes baked rotations, but only
+// some of them need it, and lots of block types share some faces.
+//
+// mode 5 (6 bytes):   mode 6 (6 bytes)
+//   x:7                x:6
+//   y:7                y:6
+//   z:6                z:6
+//   tex1:8             tex1:8
+//   tex2:8             tex2:7
+//   color:8            color:8
+//   face:4             face:7
+//
+//
+//  side faces (all x4)        top&bottom faces (2x)    internal faces (1x)
+//     1  regular                1 regular
+//     2  slabs                                             2
+//     8  stairs                 4 stairs                  16
+//     4  diag side                                         8
+//     4  upper diag side                                   8
+//     4  lower diag side                                   8
+//                                                          4 crossed pairs
+//
+//    23*4                   +   5*4                    +  46
+//  == 92 + 20 + 46 = 158
+//
+//   Must drop 30 of them to fit in 7 bits:
+//       ceiling half diagonals: 16+8 = 24
+//   Need to get rid of 6 more.
+//       ceiling diagonals: 8+4 = 12
+//   This brings it to 122, so can add a crossed-pair variant.
+//       (diagonal and non-diagonal, or randomly offset)
+//   Or carpet, which would be 5 more.
+//
+//
+// Mode 4 (10 bytes):
+//  v:  z:2,light:6
+//  f:  x:6,y:6,z:7, t1:8,t2:8,c:8,f:5
+//
+// Mode ? (10 bytes)
+//  v:  xyz:5 (27 values), light:3
+//  f:  x:7,y:7,z:6, t1:8,t2:8,c:8,f:4
+// (v:  x:2,y:2,z:2,light:2)
+
+#endif // STB_VOXEL_RENDER_IMPLEMENTATION
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/