ref: 8f4bfe49e034694ac1d79f89130ed0566845da56
dir: /utf16.c/
/* Horror stories: http://en.wikipedia.org/wiki/UTF-16 */
#include "tagspriv.h"
#define rchr(s) (be ? ((s)[0]<<8 | (s)[1]) : ((s)[1]<<8 | (s)[0]))
static const uint8_t mark[] = {0x00, 0x00, 0xc0, 0xe0, 0xf0};
int
utf16to8(uint8_t *o, int osz, const uint8_t *s, int sz)
{
uint32_t c, c2;
int i, wr, j;
bool be;
i = 0;
be = true;
if(s[0] == 0xfe && s[1] == 0xff)
i += 2;
else if(s[0] == 0xff && s[1] == 0xfe){
be = false;
i += 2;
}
for(; i < sz-1 && osz > 1;){
c = rchr(&s[i]);
i += 2;
if(c >= 0xd800 && c <= 0xdbff && i < sz-1){
c2 = rchr(&s[i]);
if(c2 >= 0xdc00 && c2 <= 0xdfff){
c = 0x10000 | (c - 0xd800)<<10 | (c2 - 0xdc00);
i += 2;
}else
return -1;
}else if(c >= 0xdc00 && c <= 0xdfff)
return -1;
if(c < 0x80)
wr = 1;
else if(c < 0x800)
wr = 2;
else if(c < 0x10000)
wr = 3;
else
wr = 4;
osz -= wr;
if(osz < 1)
break;
o += wr;
for(j = wr; j > 1; j--){
*(--o) = (c & 0xbf) | 0x80;
c >>= 6;
}
*(--o) = (uint8_t)c | mark[wr];
o += wr;
}
*o = 0;
return i;
}