ref: d28346577753a99119c9a19b7020e7edc91c44be
parent: 278590fcb46b63afcff326da4241786ab9559e86
author: Tor Andersson <tor.andersson@artifex.com>
date: Fri Dec 2 07:16:45 EST 2022
Regenerate Unicode data from latest UnicodeData.txt
--- a/genucd.py
+++ b/genucd.py
@@ -1,10 +1,12 @@
# Create utfdata.h from UnicodeData.txt
+import sys
+
tolower = []
toupper = []
isalpha = []
-for line in open("UnicodeData.txt").readlines():
+for line in open(sys.argv[1]).readlines():
line = line.split(";")
code = int(line[0],16)
# if code > 65535: continue # skip non-BMP codepoints
@@ -71,7 +73,7 @@
print(hex(a)+","+str(n-a)+",")
print("};");
-print("/* This file was automatically created from UnicodeData.txt */")
+print("/* This file was automatically created from " + sys.argv[1] + " */")
dumpalpha()
dumpmap("ucd_tolower", tolower)
dumpmap("ucd_toupper", toupper)
--- a/utfdata.h
+++ b/utfdata.h
@@ -33,8 +33,9 @@
0x800,0x815,
0x840,0x858,
0x860,0x86a,
-0x8a0,0x8b4,
-0x8b6,0x8c7,
+0x870,0x887,
+0x889,0x88e,
+0x8a0,0x8c9,
0x904,0x939,
0x958,0x961,
0x971,0x980,
@@ -89,6 +90,7 @@
0xc92,0xca8,
0xcaa,0xcb3,
0xcb5,0xcb9,
+0xcdd,0xcde,
0xce0,0xce1,
0xcf1,0xcf2,
0xd04,0xd0c,
@@ -144,9 +146,8 @@
0x1681,0x169a,
0x16a0,0x16ea,
0x16f1,0x16f8,
-0x1700,0x170c,
-0x170e,0x1711,
-0x1720,0x1731,
+0x1700,0x1711,
+0x171f,0x1731,
0x1740,0x1751,
0x1760,0x176c,
0x176e,0x1770,
@@ -163,7 +164,7 @@
0x1a00,0x1a16,
0x1a20,0x1a54,
0x1b05,0x1b33,
-0x1b45,0x1b4b,
+0x1b45,0x1b4c,
0x1b83,0x1ba0,
0x1bae,0x1baf,
0x1bba,0x1be5,
@@ -200,9 +201,7 @@
0x213c,0x213f,
0x2145,0x2149,
0x2183,0x2184,
-0x2c00,0x2c2e,
-0x2c30,0x2c5e,
-0x2c60,0x2ce4,
+0x2c00,0x2ce4,
0x2ceb,0x2cee,
0x2cf2,0x2cf3,
0x2d00,0x2d25,
@@ -227,7 +226,7 @@
0x3131,0x318e,
0x31a0,0x31bf,
0x31f0,0x31ff,
-0xa000,0xa48c,
+0x9fff,0xa48c,
0xa4d0,0xa4fd,
0xa500,0xa60c,
0xa610,0xa61f,
@@ -237,9 +236,10 @@
0xa6a0,0xa6e5,
0xa717,0xa71f,
0xa722,0xa788,
-0xa78b,0xa7bf,
-0xa7c2,0xa7ca,
-0xa7f5,0xa801,
+0xa78b,0xa7ca,
+0xa7d0,0xa7d1,
+0xa7d5,0xa7d9,
+0xa7f2,0xa801,
0xa803,0xa805,
0xa807,0xa80a,
0xa80c,0xa822,
@@ -318,9 +318,20 @@
0x104d8,0x104fb,
0x10500,0x10527,
0x10530,0x10563,
+0x10570,0x1057a,
+0x1057c,0x1058a,
+0x1058c,0x10592,
+0x10594,0x10595,
+0x10597,0x105a1,
+0x105a3,0x105b1,
+0x105b3,0x105b9,
+0x105bb,0x105bc,
0x10600,0x10736,
0x10740,0x10755,
0x10760,0x10767,
+0x10780,0x10785,
+0x10787,0x107b0,
+0x107b2,0x107ba,
0x10800,0x10805,
0x1080a,0x10835,
0x10837,0x10838,
@@ -352,9 +363,11 @@
0x10eb0,0x10eb1,
0x10f00,0x10f1c,
0x10f30,0x10f45,
+0x10f70,0x10f81,
0x10fb0,0x10fc4,
0x10fe0,0x10ff6,
0x11003,0x11037,
+0x11071,0x11072,
0x11083,0x110af,
0x110d0,0x110e8,
0x11103,0x11126,
@@ -363,6 +376,7 @@
0x111c1,0x111c4,
0x11200,0x11211,
0x11213,0x1122b,
+0x1123f,0x11240,
0x11280,0x11286,
0x1128a,0x1128d,
0x1128f,0x1129d,
@@ -385,6 +399,7 @@
0x11600,0x1162f,
0x11680,0x116aa,
0x11700,0x1171a,
+0x11740,0x11746,
0x11800,0x1182b,
0x118a0,0x118df,
0x118ff,0x11906,
@@ -395,7 +410,7 @@
0x119aa,0x119d0,
0x11a0b,0x11a32,
0x11a5c,0x11a89,
-0x11ac0,0x11af8,
+0x11ab0,0x11af8,
0x11c00,0x11c08,
0x11c0a,0x11c2e,
0x11c72,0x11c8f,
@@ -406,12 +421,17 @@
0x11d67,0x11d68,
0x11d6a,0x11d89,
0x11ee0,0x11ef2,
+0x11f04,0x11f10,
+0x11f12,0x11f33,
0x12000,0x12399,
0x12480,0x12543,
-0x13000,0x1342e,
+0x12f90,0x12ff0,
+0x13000,0x1342f,
+0x13441,0x13446,
0x14400,0x14646,
0x16800,0x16a38,
0x16a40,0x16a5e,
+0x16a70,0x16abe,
0x16ad0,0x16aed,
0x16b00,0x16b2f,
0x16b40,0x16b43,
@@ -422,7 +442,10 @@
0x16f93,0x16f9f,
0x16fe0,0x16fe1,
0x18800,0x18cd5,
-0x1b000,0x1b11e,
+0x1aff0,0x1aff3,
+0x1aff5,0x1affb,
+0x1affd,0x1affe,
+0x1b000,0x1b122,
0x1b150,0x1b152,
0x1b164,0x1b167,
0x1b170,0x1b2fb,
@@ -457,9 +480,18 @@
0x1d78a,0x1d7a8,
0x1d7aa,0x1d7c2,
0x1d7c4,0x1d7cb,
+0x1df00,0x1df1e,
+0x1df25,0x1df2a,
+0x1e030,0x1e06d,
0x1e100,0x1e12c,
0x1e137,0x1e13d,
+0x1e290,0x1e2ad,
0x1e2c0,0x1e2eb,
+0x1e4d0,0x1e4eb,
+0x1e7e0,0x1e7e6,
+0x1e7e8,0x1e7eb,
+0x1e7ed,0x1e7ee,
+0x1e7f0,0x1e7fe,
0x1e800,0x1e8c4,
0x1e900,0x1e943,
0x1ee00,0x1ee03,
@@ -516,9 +548,9 @@
0xb9c,
0xbd0,
0xc3d,
+0xc5d,
0xc80,
0xcbd,
-0xcde,
0xd3d,
0xd4e,
0xdbd,
@@ -559,7 +591,7 @@
0x3400,
0x4dbf,
0x4e00,
-0x9ffc,
+0xa7d3,
0xa8fb,
0xa9cf,
0xaa7a,
@@ -574,6 +606,7 @@
0x1083c,
0x10a00,
0x10f27,
+0x11075,
0x11144,
0x11147,
0x11176,
@@ -597,6 +630,7 @@
0x11c40,
0x11d46,
0x11d98,
+0x11f02,
0x11fb0,
0x16f50,
0x16fe3,
@@ -604,6 +638,8 @@
0x187f7,
0x18d00,
0x18d08,
+0x1b132,
+0x1b155,
0x1d4a2,
0x1d4bb,
0x1d546,
@@ -626,9 +662,9 @@
0x1ee64,
0x1ee7e,
0x20000,
-0x2a6dd,
+0x2a6df,
0x2a700,
-0x2b734,
+0x2b739,
0x2b740,
0x2b81d,
0x2b820,
@@ -637,6 +673,8 @@
0x2ebe0,
0x30000,
0x3134a,
+0x31350,
+0x323af,
};
static const Rune ucd_tolower2[] = {
@@ -678,11 +716,15 @@
0x1ffa,0x1ffb,-126,
0x2160,0x216f,16,
0x24b6,0x24cf,26,
-0x2c00,0x2c2e,48,
+0x2c00,0x2c2f,48,
0x2c7e,0x2c7f,-10815,
0xff21,0xff3a,32,
0x10400,0x10427,40,
0x104b0,0x104d3,40,
+0x10570,0x1057a,39,
+0x1057c,0x1058a,39,
+0x1058c,0x10592,39,
+0x10594,0x10595,39,
0x10c80,0x10cb2,64,
0x118a0,0x118bf,32,
0x16e40,0x16e5f,32,
@@ -1296,6 +1338,7 @@
0xa7ba,1,
0xa7bc,1,
0xa7be,1,
+0xa7c0,1,
0xa7c2,1,
0xa7c4,-48,
0xa7c5,-42307,
@@ -1302,6 +1345,9 @@
0xa7c6,-35384,
0xa7c7,1,
0xa7c9,1,
+0xa7d0,1,
+0xa7d6,1,
+0xa7d8,1,
0xa7f5,1,
};
@@ -1344,12 +1390,16 @@
0x1fe0,0x1fe1,8,
0x2170,0x217f,-16,
0x24d0,0x24e9,-26,
-0x2c30,0x2c5e,-48,
+0x2c30,0x2c5f,-48,
0x2d00,0x2d25,-7264,
0xab70,0xabbf,-38864,
0xff41,0xff5a,-32,
0x10428,0x1044f,-40,
0x104d8,0x104fb,-40,
+0x10597,0x105a1,-39,
+0x105a3,0x105b1,-39,
+0x105b3,0x105b9,-39,
+0x105bb,0x105bc,-39,
0x10cc0,0x10cf2,-64,
0x118c0,0x118df,-32,
0x16e60,0x16e7f,-32,
@@ -1980,9 +2030,13 @@
0xa7bb,-1,
0xa7bd,-1,
0xa7bf,-1,
+0xa7c1,-1,
0xa7c3,-1,
0xa7c8,-1,
0xa7ca,-1,
+0xa7d1,-1,
+0xa7d7,-1,
+0xa7d9,-1,
0xa7f6,-1,
0xab53,-928,
};