ref: 74a2d0c72b7f0118d8ceb8446e43b612c331b0d1
parent: bf8afbbf77844ee20f460c7494468048a20f6695
author: Sigrid Solveig Haflínudóttir <sigrid@ftrv.se>
date: Wed Oct 23 08:45:29 EDT 2024
reduce custom unicode logic, reuse the one in Plan 9 instead
--- a/3rd/wcwidth.c
+++ b/3rd/wcwidth.c
@@ -18,8 +18,8 @@
#include "../llt.h"
struct width_interval {- int start;
- int end;
+ int start;
+ int end;
};
// From https://github.com/jquast/wcwidth/blob/master/wcwidth/table_zero.py
@@ -26,349 +26,349 @@
// from https://github.com/jquast/wcwidth/pull/64
// at commit 1b9b6585b0080ea5cb88dc9815796505724793fe (2022-12-16):
static struct width_interval ZERO_WIDTH[] = {- {0x00300, 0x0036f}, // Combining Grave Accent ..Combining Latin Small Le- {0x00483, 0x00489}, // Combining Cyrillic Titlo..Combining Cyrillic Milli- {0x00591, 0x005bd}, // Hebrew Accent Etnahta ..Hebrew Point Meteg- {0x005bf, 0x005bf}, // Hebrew Point Rafe ..Hebrew Point Rafe- {0x005c1, 0x005c2}, // Hebrew Point Shin Dot ..Hebrew Point Sin Dot- {0x005c4, 0x005c5}, // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot- {0x005c7, 0x005c7}, // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata- {0x00610, 0x0061a}, // Arabic Sign Sallallahou ..Arabic Small Kasra- {0x0064b, 0x0065f}, // Arabic Fathatan ..Arabic Wavy Hamza Below- {0x00670, 0x00670}, // Arabic Letter Superscrip..Arabic Letter Superscrip- {0x006d6, 0x006dc}, // Arabic Small High Ligatu..Arabic Small High Seen- {0x006df, 0x006e4}, // Arabic Small High Rounde..Arabic Small High Madda- {0x006e7, 0x006e8}, // Arabic Small High Yeh ..Arabic Small High Noon- {0x006ea, 0x006ed}, // Arabic Empty Centre Low ..Arabic Small Low Meem- {0x00711, 0x00711}, // Syriac Letter Superscrip..Syriac Letter Superscrip- {0x00730, 0x0074a}, // Syriac Pthaha Above ..Syriac Barrekh- {0x007a6, 0x007b0}, // Thaana Abafili ..Thaana Sukun- {0x007eb, 0x007f3}, // Nko Combining Short High..Nko Combining Double Dot- {0x007fd, 0x007fd}, // Nko Dantayalan ..Nko Dantayalan- {0x00816, 0x00819}, // Samaritan Mark In ..Samaritan Mark Dagesh- {0x0081b, 0x00823}, // Samaritan Mark Epentheti..Samaritan Vowel Sign A- {0x00825, 0x00827}, // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U- {0x00829, 0x0082d}, // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa- {0x00859, 0x0085b}, // Mandaic Affrication Mark..Mandaic Gemination Mark- {0x00898, 0x0089f}, // Arabic Small High Word A..Arabic Half Madda Over M- {0x008ca, 0x008e1}, // Arabic Small High Farsi ..Arabic Small High Sign S- {0x008e3, 0x00902}, // Arabic Turned Damma Belo..Devanagari Sign Anusvara- {0x0093a, 0x0093a}, // Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe- {0x0093c, 0x0093c}, // Devanagari Sign Nukta ..Devanagari Sign Nukta- {0x00941, 0x00948}, // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai- {0x0094d, 0x0094d}, // Devanagari Sign Virama ..Devanagari Sign Virama- {0x00951, 0x00957}, // Devanagari Stress Sign U..Devanagari Vowel Sign Uu- {0x00962, 0x00963}, // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo- {0x00981, 0x00981}, // Bengali Sign Candrabindu..Bengali Sign Candrabindu- {0x009bc, 0x009bc}, // Bengali Sign Nukta ..Bengali Sign Nukta- {0x009c1, 0x009c4}, // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal- {0x009cd, 0x009cd}, // Bengali Sign Virama ..Bengali Sign Virama- {0x009e2, 0x009e3}, // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal- {0x009fe, 0x009fe}, // Bengali Sandhi Mark ..Bengali Sandhi Mark- {0x00a01, 0x00a02}, // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi- {0x00a3c, 0x00a3c}, // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta- {0x00a41, 0x00a42}, // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu- {0x00a47, 0x00a48}, // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai- {0x00a4b, 0x00a4d}, // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama- {0x00a51, 0x00a51}, // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat- {0x00a70, 0x00a71}, // Gurmukhi Tippi ..Gurmukhi Addak- {0x00a75, 0x00a75}, // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash- {0x00a81, 0x00a82}, // Gujarati Sign Candrabind..Gujarati Sign Anusvara- {0x00abc, 0x00abc}, // Gujarati Sign Nukta ..Gujarati Sign Nukta- {0x00ac1, 0x00ac5}, // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand- {0x00ac7, 0x00ac8}, // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai- {0x00acd, 0x00acd}, // Gujarati Sign Virama ..Gujarati Sign Virama- {0x00ae2, 0x00ae3}, // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca- {0x00afa, 0x00aff}, // Gujarati Sign Sukun ..Gujarati Sign Two-circle- {0x00b01, 0x00b01}, // Oriya Sign Candrabindu ..Oriya Sign Candrabindu- {0x00b3c, 0x00b3c}, // Oriya Sign Nukta ..Oriya Sign Nukta- {0x00b3f, 0x00b3f}, // Oriya Vowel Sign I ..Oriya Vowel Sign I- {0x00b41, 0x00b44}, // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic- {0x00b4d, 0x00b4d}, // Oriya Sign Virama ..Oriya Sign Virama- {0x00b55, 0x00b56}, // Oriya Sign Overline ..Oriya Ai Length Mark- {0x00b62, 0x00b63}, // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic- {0x00b82, 0x00b82}, // Tamil Sign Anusvara ..Tamil Sign Anusvara- {0x00bc0, 0x00bc0}, // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii- {0x00bcd, 0x00bcd}, // Tamil Sign Virama ..Tamil Sign Virama- {0x00c00, 0x00c00}, // Telugu Sign Combining Ca..Telugu Sign Combining Ca- {0x00c04, 0x00c04}, // Telugu Sign Combining An..Telugu Sign Combining An- {0x00c3c, 0x00c3c}, // Telugu Sign Nukta ..Telugu Sign Nukta- {0x00c3e, 0x00c40}, // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii- {0x00c46, 0x00c48}, // Telugu Vowel Sign E ..Telugu Vowel Sign Ai- {0x00c4a, 0x00c4d}, // Telugu Vowel Sign O ..Telugu Sign Virama- {0x00c55, 0x00c56}, // Telugu Length Mark ..Telugu Ai Length Mark- {0x00c62, 0x00c63}, // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali- {0x00c81, 0x00c81}, // Kannada Sign Candrabindu..Kannada Sign Candrabindu- {0x00cbc, 0x00cbc}, // Kannada Sign Nukta ..Kannada Sign Nukta- {0x00cbf, 0x00cbf}, // Kannada Vowel Sign I ..Kannada Vowel Sign I- {0x00cc6, 0x00cc6}, // Kannada Vowel Sign E ..Kannada Vowel Sign E- {0x00ccc, 0x00ccd}, // Kannada Vowel Sign Au ..Kannada Sign Virama- {0x00ce2, 0x00ce3}, // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal- {0x00d00, 0x00d01}, // Malayalam Sign Combining..Malayalam Sign Candrabin- {0x00d3b, 0x00d3c}, // Malayalam Sign Vertical ..Malayalam Sign Circular- {0x00d41, 0x00d44}, // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc- {0x00d4d, 0x00d4d}, // Malayalam Sign Virama ..Malayalam Sign Virama- {0x00d62, 0x00d63}, // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc- {0x00d81, 0x00d81}, // Sinhala Sign Candrabindu..Sinhala Sign Candrabindu- {0x00dca, 0x00dca}, // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna- {0x00dd2, 0x00dd4}, // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti- {0x00dd6, 0x00dd6}, // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga- {0x00e31, 0x00e31}, // Thai Character Mai Han-a..Thai Character Mai Han-a- {0x00e34, 0x00e3a}, // Thai Character Sara I ..Thai Character Phinthu- {0x00e47, 0x00e4e}, // Thai Character Maitaikhu..Thai Character Yamakkan- {0x00eb1, 0x00eb1}, // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan- {0x00eb4, 0x00ebc}, // Lao Vowel Sign I ..Lao Semivowel Sign Lo- {0x00ec8, 0x00ece}, // Lao Tone Mai Ek ..(nil)- {0x00f18, 0x00f19}, // Tibetan Astrological Sig..Tibetan Astrological Sig- {0x00f35, 0x00f35}, // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung- {0x00f37, 0x00f37}, // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung- {0x00f39, 0x00f39}, // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru- {0x00f71, 0x00f7e}, // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga- {0x00f80, 0x00f84}, // Tibetan Vowel Sign Rever..Tibetan Mark Halanta- {0x00f86, 0x00f87}, // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags- {0x00f8d, 0x00f97}, // Tibetan Subjoined Sign L..Tibetan Subjoined Letter- {0x00f99, 0x00fbc}, // Tibetan Subjoined Letter..Tibetan Subjoined Letter- {0x00fc6, 0x00fc6}, // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda- {0x0102d, 0x01030}, // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu- {0x01032, 0x01037}, // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below- {0x01039, 0x0103a}, // Myanmar Sign Virama ..Myanmar Sign Asat- {0x0103d, 0x0103e}, // Myanmar Consonant Sign M..Myanmar Consonant Sign M- {0x01058, 0x01059}, // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal- {0x0105e, 0x01060}, // Myanmar Consonant Sign M..Myanmar Consonant Sign M- {0x01071, 0x01074}, // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah- {0x01082, 0x01082}, // Myanmar Consonant Sign S..Myanmar Consonant Sign S- {0x01085, 0x01086}, // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan- {0x0108d, 0x0108d}, // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci- {0x0109d, 0x0109d}, // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton- {0x0135d, 0x0135f}, // Ethiopic Combining Gemin..Ethiopic Combining Gemin- {0x01712, 0x01714}, // Tagalog Vowel Sign I ..Tagalog Sign Virama- {0x01732, 0x01733}, // Hanunoo Vowel Sign I ..Hanunoo Vowel Sign U- {0x01752, 0x01753}, // Buhid Vowel Sign I ..Buhid Vowel Sign U- {0x01772, 0x01773}, // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U- {0x017b4, 0x017b5}, // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa- {0x017b7, 0x017bd}, // Khmer Vowel Sign I ..Khmer Vowel Sign Ua- {0x017c6, 0x017c6}, // Khmer Sign Nikahit ..Khmer Sign Nikahit- {0x017c9, 0x017d3}, // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat- {0x017dd, 0x017dd}, // Khmer Sign Atthacan ..Khmer Sign Atthacan- {0x0180b, 0x0180d}, // Mongolian Free Variation..Mongolian Free Variation- {0x0180f, 0x0180f}, // Mongolian Free Variation..Mongolian Free Variation- {0x01885, 0x01886}, // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal- {0x018a9, 0x018a9}, // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal- {0x01920, 0x01922}, // Limbu Vowel Sign A ..Limbu Vowel Sign U- {0x01927, 0x01928}, // Limbu Vowel Sign E ..Limbu Vowel Sign O- {0x01932, 0x01932}, // Limbu Small Letter Anusv..Limbu Small Letter Anusv- {0x01939, 0x0193b}, // Limbu Sign Mukphreng ..Limbu Sign Sa-i- {0x01a17, 0x01a18}, // Buginese Vowel Sign I ..Buginese Vowel Sign U- {0x01a1b, 0x01a1b}, // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae- {0x01a56, 0x01a56}, // Tai Tham Consonant Sign ..Tai Tham Consonant Sign- {0x01a58, 0x01a5e}, // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign- {0x01a60, 0x01a60}, // Tai Tham Sign Sakot ..Tai Tham Sign Sakot- {0x01a62, 0x01a62}, // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai- {0x01a65, 0x01a6c}, // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B- {0x01a73, 0x01a7c}, // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue- {0x01a7f, 0x01a7f}, // Tai Tham Combining Crypt..Tai Tham Combining Crypt- {0x01ab0, 0x01ace}, // Combining Doubled Circum..Combining Latin Small Le- {0x01b00, 0x01b03}, // Balinese Sign Ulu Ricem ..Balinese Sign Surang- {0x01b34, 0x01b34}, // Balinese Sign Rerekan ..Balinese Sign Rerekan- {0x01b36, 0x01b3a}, // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R- {0x01b3c, 0x01b3c}, // Balinese Vowel Sign La L..Balinese Vowel Sign La L- {0x01b42, 0x01b42}, // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe- {0x01b6b, 0x01b73}, // Balinese Musical Symbol ..Balinese Musical Symbol- {0x01b80, 0x01b81}, // Sundanese Sign Panyecek ..Sundanese Sign Panglayar- {0x01ba2, 0x01ba5}, // Sundanese Consonant Sign..Sundanese Vowel Sign Pan- {0x01ba8, 0x01ba9}, // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan- {0x01bab, 0x01bad}, // Sundanese Sign Virama ..Sundanese Consonant Sign- {0x01be6, 0x01be6}, // Batak Sign Tompi ..Batak Sign Tompi- {0x01be8, 0x01be9}, // Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee- {0x01bed, 0x01bed}, // Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O- {0x01bef, 0x01bf1}, // Batak Vowel Sign U For S..Batak Consonant Sign H- {0x01c2c, 0x01c33}, // Lepcha Vowel Sign E ..Lepcha Consonant Sign T- {0x01c36, 0x01c37}, // Lepcha Sign Ran ..Lepcha Sign Nukta- {0x01cd0, 0x01cd2}, // Vedic Tone Karshana ..Vedic Tone Prenkha- {0x01cd4, 0x01ce0}, // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash- {0x01ce2, 0x01ce8}, // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda- {0x01ced, 0x01ced}, // Vedic Sign Tiryak ..Vedic Sign Tiryak- {0x01cf4, 0x01cf4}, // Vedic Tone Candra Above ..Vedic Tone Candra Above- {0x01cf8, 0x01cf9}, // Vedic Tone Ring Above ..Vedic Tone Double Ring A- {0x01dc0, 0x01dff}, // Combining Dotted Grave A..Combining Right Arrowhea- {0x020d0, 0x020f0}, // Combining Left Harpoon A..Combining Asterisk Above- {0x02cef, 0x02cf1}, // Coptic Combining Ni Abov..Coptic Combining Spiritu- {0x02d7f, 0x02d7f}, // Tifinagh Consonant Joine..Tifinagh Consonant Joine- {0x02de0, 0x02dff}, // Combining Cyrillic Lette..Combining Cyrillic Lette- {0x0302a, 0x0302d}, // Ideographic Level Tone M..Ideographic Entering Ton- {0x03099, 0x0309a}, // Combining Katakana-hirag..Combining Katakana-hirag- {0x0a66f, 0x0a672}, // Combining Cyrillic Vzmet..Combining Cyrillic Thous- {0x0a674, 0x0a67d}, // Combining Cyrillic Lette..Combining Cyrillic Payer- {0x0a69e, 0x0a69f}, // Combining Cyrillic Lette..Combining Cyrillic Lette- {0x0a6f0, 0x0a6f1}, // Bamum Combining Mark Koq..Bamum Combining Mark Tuk- {0x0a802, 0x0a802}, // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva- {0x0a806, 0x0a806}, // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant- {0x0a80b, 0x0a80b}, // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva- {0x0a825, 0x0a826}, // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign- {0x0a82c, 0x0a82c}, // Syloti Nagri Sign Altern..Syloti Nagri Sign Altern- {0x0a8c4, 0x0a8c5}, // Saurashtra Sign Virama ..Saurashtra Sign Candrabi- {0x0a8e0, 0x0a8f1}, // Combining Devanagari Dig..Combining Devanagari Sig- {0x0a8ff, 0x0a8ff}, // Devanagari Vowel Sign Ay..Devanagari Vowel Sign Ay- {0x0a926, 0x0a92d}, // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop- {0x0a947, 0x0a951}, // Rejang Vowel Sign I ..Rejang Consonant Sign R- {0x0a980, 0x0a982}, // Javanese Sign Panyangga ..Javanese Sign Layar- {0x0a9b3, 0x0a9b3}, // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu- {0x0a9b6, 0x0a9b9}, // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku- {0x0a9bc, 0x0a9bd}, // Javanese Vowel Sign Pepe..Javanese Consonant Sign- {0x0a9e5, 0x0a9e5}, // Myanmar Sign Shan Saw ..Myanmar Sign Shan Saw- {0x0aa29, 0x0aa2e}, // Cham Vowel Sign Aa ..Cham Vowel Sign Oe- {0x0aa31, 0x0aa32}, // Cham Vowel Sign Au ..Cham Vowel Sign Ue- {0x0aa35, 0x0aa36}, // Cham Consonant Sign La ..Cham Consonant Sign Wa- {0x0aa43, 0x0aa43}, // Cham Consonant Sign Fina..Cham Consonant Sign Fina- {0x0aa4c, 0x0aa4c}, // Cham Consonant Sign Fina..Cham Consonant Sign Fina- {0x0aa7c, 0x0aa7c}, // Myanmar Sign Tai Laing T..Myanmar Sign Tai Laing T- {0x0aab0, 0x0aab0}, // Tai Viet Mai Kang ..Tai Viet Mai Kang- {0x0aab2, 0x0aab4}, // Tai Viet Vowel I ..Tai Viet Vowel U- {0x0aab7, 0x0aab8}, // Tai Viet Mai Khit ..Tai Viet Vowel Ia- {0x0aabe, 0x0aabf}, // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek- {0x0aac1, 0x0aac1}, // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho- {0x0aaec, 0x0aaed}, // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign- {0x0aaf6, 0x0aaf6}, // Meetei Mayek Virama ..Meetei Mayek Virama- {0x0abe5, 0x0abe5}, // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign- {0x0abe8, 0x0abe8}, // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign- {0x0abed, 0x0abed}, // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek- {0x0fb1e, 0x0fb1e}, // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani- {0x0fe00, 0x0fe0f}, // Variation Selector-1 ..Variation Selector-16- {0x0fe20, 0x0fe2f}, // Combining Ligature Left ..Combining Cyrillic Titlo- {0x101fd, 0x101fd}, // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi- {0x102e0, 0x102e0}, // Coptic Epact Thousands M..Coptic Epact Thousands M- {0x10376, 0x1037a}, // Combining Old Permic Let..Combining Old Permic Let- {0x10a01, 0x10a03}, // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo- {0x10a05, 0x10a06}, // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O- {0x10a0c, 0x10a0f}, // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga- {0x10a38, 0x10a3a}, // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo- {0x10a3f, 0x10a3f}, // Kharoshthi Virama ..Kharoshthi Virama- {0x10ae5, 0x10ae6}, // Manichaean Abbreviation ..Manichaean Abbreviation- {0x10d24, 0x10d27}, // Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas- {0x10eab, 0x10eac}, // Yezidi Combining Hamza M..Yezidi Combining Madda M- {0x10efd, 0x10eff}, // (nil) ..(nil)- {0x10f46, 0x10f50}, // Sogdian Combining Dot Be..Sogdian Combining Stroke- {0x10f82, 0x10f85}, // Old Uyghur Combining Dot..Old Uyghur Combining Two- {0x11001, 0x11001}, // Brahmi Sign Anusvara ..Brahmi Sign Anusvara- {0x11038, 0x11046}, // Brahmi Vowel Sign Aa ..Brahmi Virama- {0x11070, 0x11070}, // Brahmi Sign Old Tamil Vi..Brahmi Sign Old Tamil Vi- {0x11073, 0x11074}, // Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta- {0x1107f, 0x11081}, // Brahmi Number Joiner ..Kaithi Sign Anusvara- {0x110b3, 0x110b6}, // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai- {0x110b9, 0x110ba}, // Kaithi Sign Virama ..Kaithi Sign Nukta- {0x110c2, 0x110c2}, // Kaithi Vowel Sign Vocali..Kaithi Vowel Sign Vocali- {0x11100, 0x11102}, // Chakma Sign Candrabindu ..Chakma Sign Visarga- {0x11127, 0x1112b}, // Chakma Vowel Sign A ..Chakma Vowel Sign Uu- {0x1112d, 0x11134}, // Chakma Vowel Sign Ai ..Chakma Maayyaa- {0x11173, 0x11173}, // Mahajani Sign Nukta ..Mahajani Sign Nukta- {0x11180, 0x11181}, // Sharada Sign Candrabindu..Sharada Sign Anusvara- {0x111b6, 0x111be}, // Sharada Vowel Sign U ..Sharada Vowel Sign O- {0x111c9, 0x111cc}, // Sharada Sandhi Mark ..Sharada Extra Short Vowe- {0x111cf, 0x111cf}, // Sharada Sign Inverted Ca..Sharada Sign Inverted Ca- {0x1122f, 0x11231}, // Khojki Vowel Sign U ..Khojki Vowel Sign Ai- {0x11234, 0x11234}, // Khojki Sign Anusvara ..Khojki Sign Anusvara- {0x11236, 0x11237}, // Khojki Sign Nukta ..Khojki Sign Shadda- {0x1123e, 0x1123e}, // Khojki Sign Sukun ..Khojki Sign Sukun- {0x11241, 0x11241}, // (nil) ..(nil)- {0x112df, 0x112df}, // Khudawadi Sign Anusvara ..Khudawadi Sign Anusvara- {0x112e3, 0x112ea}, // Khudawadi Vowel Sign U ..Khudawadi Sign Virama- {0x11300, 0x11301}, // Grantha Sign Combining A..Grantha Sign Candrabindu- {0x1133b, 0x1133c}, // Combining Bindu Below ..Grantha Sign Nukta- {0x11340, 0x11340}, // Grantha Vowel Sign Ii ..Grantha Vowel Sign Ii- {0x11366, 0x1136c}, // Combining Grantha Digit ..Combining Grantha Digit- {0x11370, 0x11374}, // Combining Grantha Letter..Combining Grantha Letter- {0x11438, 0x1143f}, // Newa Vowel Sign U ..Newa Vowel Sign Ai- {0x11442, 0x11444}, // Newa Sign Virama ..Newa Sign Anusvara- {0x11446, 0x11446}, // Newa Sign Nukta ..Newa Sign Nukta- {0x1145e, 0x1145e}, // Newa Sandhi Mark ..Newa Sandhi Mark- {0x114b3, 0x114b8}, // Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal- {0x114ba, 0x114ba}, // Tirhuta Vowel Sign Short..Tirhuta Vowel Sign Short- {0x114bf, 0x114c0}, // Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara- {0x114c2, 0x114c3}, // Tirhuta Sign Virama ..Tirhuta Sign Nukta- {0x115b2, 0x115b5}, // Siddham Vowel Sign U ..Siddham Vowel Sign Vocal- {0x115bc, 0x115bd}, // Siddham Sign Candrabindu..Siddham Sign Anusvara- {0x115bf, 0x115c0}, // Siddham Sign Virama ..Siddham Sign Nukta- {0x115dc, 0x115dd}, // Siddham Vowel Sign Alter..Siddham Vowel Sign Alter- {0x11633, 0x1163a}, // Modi Vowel Sign U ..Modi Vowel Sign Ai- {0x1163d, 0x1163d}, // Modi Sign Anusvara ..Modi Sign Anusvara- {0x1163f, 0x11640}, // Modi Sign Virama ..Modi Sign Ardhacandra- {0x116ab, 0x116ab}, // Takri Sign Anusvara ..Takri Sign Anusvara- {0x116ad, 0x116ad}, // Takri Vowel Sign Aa ..Takri Vowel Sign Aa- {0x116b0, 0x116b5}, // Takri Vowel Sign U ..Takri Vowel Sign Au- {0x116b7, 0x116b7}, // Takri Sign Nukta ..Takri Sign Nukta- {0x1171d, 0x1171f}, // Ahom Consonant Sign Medi..Ahom Consonant Sign Medi- {0x11722, 0x11725}, // Ahom Vowel Sign I ..Ahom Vowel Sign Uu- {0x11727, 0x1172b}, // Ahom Vowel Sign Aw ..Ahom Sign Killer- {0x1182f, 0x11837}, // Dogra Vowel Sign U ..Dogra Sign Anusvara- {0x11839, 0x1183a}, // Dogra Sign Virama ..Dogra Sign Nukta- {0x1193b, 0x1193c}, // Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab- {0x1193e, 0x1193e}, // Dives Akuru Virama ..Dives Akuru Virama- {0x11943, 0x11943}, // Dives Akuru Sign Nukta ..Dives Akuru Sign Nukta- {0x119d4, 0x119d7}, // Nandinagari Vowel Sign U..Nandinagari Vowel Sign V- {0x119da, 0x119db}, // Nandinagari Vowel Sign E..Nandinagari Vowel Sign A- {0x119e0, 0x119e0}, // Nandinagari Sign Virama ..Nandinagari Sign Virama- {0x11a01, 0x11a0a}, // Zanabazar Square Vowel S..Zanabazar Square Vowel L- {0x11a33, 0x11a38}, // Zanabazar Square Final C..Zanabazar Square Sign An- {0x11a3b, 0x11a3e}, // Zanabazar Square Cluster..Zanabazar Square Cluster- {0x11a47, 0x11a47}, // Zanabazar Square Subjoin..Zanabazar Square Subjoin- {0x11a51, 0x11a56}, // Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe- {0x11a59, 0x11a5b}, // Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar- {0x11a8a, 0x11a96}, // Soyombo Final Consonant ..Soyombo Sign Anusvara- {0x11a98, 0x11a99}, // Soyombo Gemination Mark ..Soyombo Subjoiner- {0x11c30, 0x11c36}, // Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc- {0x11c38, 0x11c3d}, // Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara- {0x11c3f, 0x11c3f}, // Bhaiksuki Sign Virama ..Bhaiksuki Sign Virama- {0x11c92, 0x11ca7}, // Marchen Subjoined Letter..Marchen Subjoined Letter- {0x11caa, 0x11cb0}, // Marchen Subjoined Letter..Marchen Vowel Sign Aa- {0x11cb2, 0x11cb3}, // Marchen Vowel Sign U ..Marchen Vowel Sign E- {0x11cb5, 0x11cb6}, // Marchen Sign Anusvara ..Marchen Sign Candrabindu- {0x11d31, 0x11d36}, // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign- {0x11d3a, 0x11d3a}, // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign- {0x11d3c, 0x11d3d}, // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign- {0x11d3f, 0x11d45}, // Masaram Gondi Vowel Sign..Masaram Gondi Virama- {0x11d47, 0x11d47}, // Masaram Gondi Ra-kara ..Masaram Gondi Ra-kara- {0x11d90, 0x11d91}, // Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign- {0x11d95, 0x11d95}, // Gunjala Gondi Sign Anusv..Gunjala Gondi Sign Anusv- {0x11d97, 0x11d97}, // Gunjala Gondi Virama ..Gunjala Gondi Virama- {0x11ef3, 0x11ef4}, // Makasar Vowel Sign I ..Makasar Vowel Sign U- {0x11f00, 0x11f01}, // (nil) ..(nil)- {0x11f36, 0x11f3a}, // (nil) ..(nil)- {0x11f40, 0x11f40}, // (nil) ..(nil)- {0x11f42, 0x11f42}, // (nil) ..(nil)- {0x13440, 0x13440}, // (nil) ..(nil)- {0x13447, 0x13455}, // (nil) ..(nil)- {0x16af0, 0x16af4}, // Bassa Vah Combining High..Bassa Vah Combining High- {0x16b30, 0x16b36}, // Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta- {0x16f4f, 0x16f4f}, // Miao Sign Consonant Modi..Miao Sign Consonant Modi- {0x16f8f, 0x16f92}, // Miao Tone Right ..Miao Tone Below- {0x16fe4, 0x16fe4}, // Khitan Small Script Fill..Khitan Small Script Fill- {0x1bc9d, 0x1bc9e}, // Duployan Thick Letter Se..Duployan Double Mark- {0x1cf00, 0x1cf2d}, // Znamenny Combining Mark ..Znamenny Combining Mark- {0x1cf30, 0x1cf46}, // Znamenny Combining Tonal..Znamenny Priznak Modifie- {0x1d167, 0x1d169}, // Musical Symbol Combining..Musical Symbol Combining- {0x1d17b, 0x1d182}, // Musical Symbol Combining..Musical Symbol Combining- {0x1d185, 0x1d18b}, // Musical Symbol Combining..Musical Symbol Combining- {0x1d1aa, 0x1d1ad}, // Musical Symbol Combining..Musical Symbol Combining- {0x1d242, 0x1d244}, // Combining Greek Musical ..Combining Greek Musical- {0x1da00, 0x1da36}, // Signwriting Head Rim ..Signwriting Air Sucking- {0x1da3b, 0x1da6c}, // Signwriting Mouth Closed..Signwriting Excitement- {0x1da75, 0x1da75}, // Signwriting Upper Body T..Signwriting Upper Body T- {0x1da84, 0x1da84}, // Signwriting Location Hea..Signwriting Location Hea- {0x1da9b, 0x1da9f}, // Signwriting Fill Modifie..Signwriting Fill Modifie- {0x1daa1, 0x1daaf}, // Signwriting Rotation Mod..Signwriting Rotation Mod- {0x1e000, 0x1e006}, // Combining Glagolitic Let..Combining Glagolitic Let- {0x1e008, 0x1e018}, // Combining Glagolitic Let..Combining Glagolitic Let- {0x1e01b, 0x1e021}, // Combining Glagolitic Let..Combining Glagolitic Let- {0x1e023, 0x1e024}, // Combining Glagolitic Let..Combining Glagolitic Let- {0x1e026, 0x1e02a}, // Combining Glagolitic Let..Combining Glagolitic Let- {0x1e08f, 0x1e08f}, // (nil) ..(nil)- {0x1e130, 0x1e136}, // Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T- {0x1e2ae, 0x1e2ae}, // Toto Sign Rising Tone ..Toto Sign Rising Tone- {0x1e2ec, 0x1e2ef}, // Wancho Tone Tup ..Wancho Tone Koini- {0x1e4ec, 0x1e4ef}, // (nil) ..(nil)- {0x1e8d0, 0x1e8d6}, // Mende Kikakui Combining ..Mende Kikakui Combining- {0x1e944, 0x1e94a}, // Adlam Alif Lengthener ..Adlam Nukta- {0xe0100, 0xe01ef}, // Variation Selector-17 ..Variation Selector-256+ {0x00300, 0x0036f}, // Combining Grave Accent ..Combining Latin Small Le+ {0x00483, 0x00489}, // Combining Cyrillic Titlo..Combining Cyrillic Milli+ {0x00591, 0x005bd}, // Hebrew Accent Etnahta ..Hebrew Point Meteg+ {0x005bf, 0x005bf}, // Hebrew Point Rafe ..Hebrew Point Rafe+ {0x005c1, 0x005c2}, // Hebrew Point Shin Dot ..Hebrew Point Sin Dot+ {0x005c4, 0x005c5}, // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot+ {0x005c7, 0x005c7}, // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata+ {0x00610, 0x0061a}, // Arabic Sign Sallallahou ..Arabic Small Kasra+ {0x0064b, 0x0065f}, // Arabic Fathatan ..Arabic Wavy Hamza Below+ {0x00670, 0x00670}, // Arabic Letter Superscrip..Arabic Letter Superscrip+ {0x006d6, 0x006dc}, // Arabic Small High Ligatu..Arabic Small High Seen+ {0x006df, 0x006e4}, // Arabic Small High Rounde..Arabic Small High Madda+ {0x006e7, 0x006e8}, // Arabic Small High Yeh ..Arabic Small High Noon+ {0x006ea, 0x006ed}, // Arabic Empty Centre Low ..Arabic Small Low Meem+ {0x00711, 0x00711}, // Syriac Letter Superscrip..Syriac Letter Superscrip+ {0x00730, 0x0074a}, // Syriac Pthaha Above ..Syriac Barrekh+ {0x007a6, 0x007b0}, // Thaana Abafili ..Thaana Sukun+ {0x007eb, 0x007f3}, // Nko Combining Short High..Nko Combining Double Dot+ {0x007fd, 0x007fd}, // Nko Dantayalan ..Nko Dantayalan+ {0x00816, 0x00819}, // Samaritan Mark In ..Samaritan Mark Dagesh+ {0x0081b, 0x00823}, // Samaritan Mark Epentheti..Samaritan Vowel Sign A+ {0x00825, 0x00827}, // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U+ {0x00829, 0x0082d}, // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa+ {0x00859, 0x0085b}, // Mandaic Affrication Mark..Mandaic Gemination Mark+ {0x00898, 0x0089f}, // Arabic Small High Word A..Arabic Half Madda Over M+ {0x008ca, 0x008e1}, // Arabic Small High Farsi ..Arabic Small High Sign S+ {0x008e3, 0x00902}, // Arabic Turned Damma Belo..Devanagari Sign Anusvara+ {0x0093a, 0x0093a}, // Devanagari Vowel Sign Oe..Devanagari Vowel Sign Oe+ {0x0093c, 0x0093c}, // Devanagari Sign Nukta ..Devanagari Sign Nukta+ {0x00941, 0x00948}, // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai+ {0x0094d, 0x0094d}, // Devanagari Sign Virama ..Devanagari Sign Virama+ {0x00951, 0x00957}, // Devanagari Stress Sign U..Devanagari Vowel Sign Uu+ {0x00962, 0x00963}, // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo+ {0x00981, 0x00981}, // Bengali Sign Candrabindu..Bengali Sign Candrabindu+ {0x009bc, 0x009bc}, // Bengali Sign Nukta ..Bengali Sign Nukta+ {0x009c1, 0x009c4}, // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal+ {0x009cd, 0x009cd}, // Bengali Sign Virama ..Bengali Sign Virama+ {0x009e2, 0x009e3}, // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal+ {0x009fe, 0x009fe}, // Bengali Sandhi Mark ..Bengali Sandhi Mark+ {0x00a01, 0x00a02}, // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi+ {0x00a3c, 0x00a3c}, // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta+ {0x00a41, 0x00a42}, // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu+ {0x00a47, 0x00a48}, // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai+ {0x00a4b, 0x00a4d}, // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama+ {0x00a51, 0x00a51}, // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat+ {0x00a70, 0x00a71}, // Gurmukhi Tippi ..Gurmukhi Addak+ {0x00a75, 0x00a75}, // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash+ {0x00a81, 0x00a82}, // Gujarati Sign Candrabind..Gujarati Sign Anusvara+ {0x00abc, 0x00abc}, // Gujarati Sign Nukta ..Gujarati Sign Nukta+ {0x00ac1, 0x00ac5}, // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand+ {0x00ac7, 0x00ac8}, // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai+ {0x00acd, 0x00acd}, // Gujarati Sign Virama ..Gujarati Sign Virama+ {0x00ae2, 0x00ae3}, // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca+ {0x00afa, 0x00aff}, // Gujarati Sign Sukun ..Gujarati Sign Two-circle+ {0x00b01, 0x00b01}, // Oriya Sign Candrabindu ..Oriya Sign Candrabindu+ {0x00b3c, 0x00b3c}, // Oriya Sign Nukta ..Oriya Sign Nukta+ {0x00b3f, 0x00b3f}, // Oriya Vowel Sign I ..Oriya Vowel Sign I+ {0x00b41, 0x00b44}, // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic+ {0x00b4d, 0x00b4d}, // Oriya Sign Virama ..Oriya Sign Virama+ {0x00b55, 0x00b56}, // Oriya Sign Overline ..Oriya Ai Length Mark+ {0x00b62, 0x00b63}, // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic+ {0x00b82, 0x00b82}, // Tamil Sign Anusvara ..Tamil Sign Anusvara+ {0x00bc0, 0x00bc0}, // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii+ {0x00bcd, 0x00bcd}, // Tamil Sign Virama ..Tamil Sign Virama+ {0x00c00, 0x00c00}, // Telugu Sign Combining Ca..Telugu Sign Combining Ca+ {0x00c04, 0x00c04}, // Telugu Sign Combining An..Telugu Sign Combining An+ {0x00c3c, 0x00c3c}, // Telugu Sign Nukta ..Telugu Sign Nukta+ {0x00c3e, 0x00c40}, // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii+ {0x00c46, 0x00c48}, // Telugu Vowel Sign E ..Telugu Vowel Sign Ai+ {0x00c4a, 0x00c4d}, // Telugu Vowel Sign O ..Telugu Sign Virama+ {0x00c55, 0x00c56}, // Telugu Length Mark ..Telugu Ai Length Mark+ {0x00c62, 0x00c63}, // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali+ {0x00c81, 0x00c81}, // Kannada Sign Candrabindu..Kannada Sign Candrabindu+ {0x00cbc, 0x00cbc}, // Kannada Sign Nukta ..Kannada Sign Nukta+ {0x00cbf, 0x00cbf}, // Kannada Vowel Sign I ..Kannada Vowel Sign I+ {0x00cc6, 0x00cc6}, // Kannada Vowel Sign E ..Kannada Vowel Sign E+ {0x00ccc, 0x00ccd}, // Kannada Vowel Sign Au ..Kannada Sign Virama+ {0x00ce2, 0x00ce3}, // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal+ {0x00d00, 0x00d01}, // Malayalam Sign Combining..Malayalam Sign Candrabin+ {0x00d3b, 0x00d3c}, // Malayalam Sign Vertical ..Malayalam Sign Circular+ {0x00d41, 0x00d44}, // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc+ {0x00d4d, 0x00d4d}, // Malayalam Sign Virama ..Malayalam Sign Virama+ {0x00d62, 0x00d63}, // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc+ {0x00d81, 0x00d81}, // Sinhala Sign Candrabindu..Sinhala Sign Candrabindu+ {0x00dca, 0x00dca}, // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna+ {0x00dd2, 0x00dd4}, // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti+ {0x00dd6, 0x00dd6}, // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga+ {0x00e31, 0x00e31}, // Thai Character Mai Han-a..Thai Character Mai Han-a+ {0x00e34, 0x00e3a}, // Thai Character Sara I ..Thai Character Phinthu+ {0x00e47, 0x00e4e}, // Thai Character Maitaikhu..Thai Character Yamakkan+ {0x00eb1, 0x00eb1}, // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan+ {0x00eb4, 0x00ebc}, // Lao Vowel Sign I ..Lao Semivowel Sign Lo+ {0x00ec8, 0x00ece}, // Lao Tone Mai Ek ..(nil)+ {0x00f18, 0x00f19}, // Tibetan Astrological Sig..Tibetan Astrological Sig+ {0x00f35, 0x00f35}, // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung+ {0x00f37, 0x00f37}, // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung+ {0x00f39, 0x00f39}, // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru+ {0x00f71, 0x00f7e}, // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga+ {0x00f80, 0x00f84}, // Tibetan Vowel Sign Rever..Tibetan Mark Halanta+ {0x00f86, 0x00f87}, // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags+ {0x00f8d, 0x00f97}, // Tibetan Subjoined Sign L..Tibetan Subjoined Letter+ {0x00f99, 0x00fbc}, // Tibetan Subjoined Letter..Tibetan Subjoined Letter+ {0x00fc6, 0x00fc6}, // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda+ {0x0102d, 0x01030}, // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu+ {0x01032, 0x01037}, // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below+ {0x01039, 0x0103a}, // Myanmar Sign Virama ..Myanmar Sign Asat+ {0x0103d, 0x0103e}, // Myanmar Consonant Sign M..Myanmar Consonant Sign M+ {0x01058, 0x01059}, // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal+ {0x0105e, 0x01060}, // Myanmar Consonant Sign M..Myanmar Consonant Sign M+ {0x01071, 0x01074}, // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah+ {0x01082, 0x01082}, // Myanmar Consonant Sign S..Myanmar Consonant Sign S+ {0x01085, 0x01086}, // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan+ {0x0108d, 0x0108d}, // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci+ {0x0109d, 0x0109d}, // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton+ {0x0135d, 0x0135f}, // Ethiopic Combining Gemin..Ethiopic Combining Gemin+ {0x01712, 0x01714}, // Tagalog Vowel Sign I ..Tagalog Sign Virama+ {0x01732, 0x01733}, // Hanunoo Vowel Sign I ..Hanunoo Vowel Sign U+ {0x01752, 0x01753}, // Buhid Vowel Sign I ..Buhid Vowel Sign U+ {0x01772, 0x01773}, // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U+ {0x017b4, 0x017b5}, // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa+ {0x017b7, 0x017bd}, // Khmer Vowel Sign I ..Khmer Vowel Sign Ua+ {0x017c6, 0x017c6}, // Khmer Sign Nikahit ..Khmer Sign Nikahit+ {0x017c9, 0x017d3}, // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat+ {0x017dd, 0x017dd}, // Khmer Sign Atthacan ..Khmer Sign Atthacan+ {0x0180b, 0x0180d}, // Mongolian Free Variation..Mongolian Free Variation+ {0x0180f, 0x0180f}, // Mongolian Free Variation..Mongolian Free Variation+ {0x01885, 0x01886}, // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal+ {0x018a9, 0x018a9}, // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal+ {0x01920, 0x01922}, // Limbu Vowel Sign A ..Limbu Vowel Sign U+ {0x01927, 0x01928}, // Limbu Vowel Sign E ..Limbu Vowel Sign O+ {0x01932, 0x01932}, // Limbu Small Letter Anusv..Limbu Small Letter Anusv+ {0x01939, 0x0193b}, // Limbu Sign Mukphreng ..Limbu Sign Sa-i+ {0x01a17, 0x01a18}, // Buginese Vowel Sign I ..Buginese Vowel Sign U+ {0x01a1b, 0x01a1b}, // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae+ {0x01a56, 0x01a56}, // Tai Tham Consonant Sign ..Tai Tham Consonant Sign+ {0x01a58, 0x01a5e}, // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign+ {0x01a60, 0x01a60}, // Tai Tham Sign Sakot ..Tai Tham Sign Sakot+ {0x01a62, 0x01a62}, // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai+ {0x01a65, 0x01a6c}, // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B+ {0x01a73, 0x01a7c}, // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue+ {0x01a7f, 0x01a7f}, // Tai Tham Combining Crypt..Tai Tham Combining Crypt+ {0x01ab0, 0x01ace}, // Combining Doubled Circum..Combining Latin Small Le+ {0x01b00, 0x01b03}, // Balinese Sign Ulu Ricem ..Balinese Sign Surang+ {0x01b34, 0x01b34}, // Balinese Sign Rerekan ..Balinese Sign Rerekan+ {0x01b36, 0x01b3a}, // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R+ {0x01b3c, 0x01b3c}, // Balinese Vowel Sign La L..Balinese Vowel Sign La L+ {0x01b42, 0x01b42}, // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe+ {0x01b6b, 0x01b73}, // Balinese Musical Symbol ..Balinese Musical Symbol+ {0x01b80, 0x01b81}, // Sundanese Sign Panyecek ..Sundanese Sign Panglayar+ {0x01ba2, 0x01ba5}, // Sundanese Consonant Sign..Sundanese Vowel Sign Pan+ {0x01ba8, 0x01ba9}, // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan+ {0x01bab, 0x01bad}, // Sundanese Sign Virama ..Sundanese Consonant Sign+ {0x01be6, 0x01be6}, // Batak Sign Tompi ..Batak Sign Tompi+ {0x01be8, 0x01be9}, // Batak Vowel Sign Pakpak ..Batak Vowel Sign Ee+ {0x01bed, 0x01bed}, // Batak Vowel Sign Karo O ..Batak Vowel Sign Karo O+ {0x01bef, 0x01bf1}, // Batak Vowel Sign U For S..Batak Consonant Sign H+ {0x01c2c, 0x01c33}, // Lepcha Vowel Sign E ..Lepcha Consonant Sign T+ {0x01c36, 0x01c37}, // Lepcha Sign Ran ..Lepcha Sign Nukta+ {0x01cd0, 0x01cd2}, // Vedic Tone Karshana ..Vedic Tone Prenkha+ {0x01cd4, 0x01ce0}, // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash+ {0x01ce2, 0x01ce8}, // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda+ {0x01ced, 0x01ced}, // Vedic Sign Tiryak ..Vedic Sign Tiryak+ {0x01cf4, 0x01cf4}, // Vedic Tone Candra Above ..Vedic Tone Candra Above+ {0x01cf8, 0x01cf9}, // Vedic Tone Ring Above ..Vedic Tone Double Ring A+ {0x01dc0, 0x01dff}, // Combining Dotted Grave A..Combining Right Arrowhea+ {0x020d0, 0x020f0}, // Combining Left Harpoon A..Combining Asterisk Above+ {0x02cef, 0x02cf1}, // Coptic Combining Ni Abov..Coptic Combining Spiritu+ {0x02d7f, 0x02d7f}, // Tifinagh Consonant Joine..Tifinagh Consonant Joine+ {0x02de0, 0x02dff}, // Combining Cyrillic Lette..Combining Cyrillic Lette+ {0x0302a, 0x0302d}, // Ideographic Level Tone M..Ideographic Entering Ton+ {0x03099, 0x0309a}, // Combining Katakana-hirag..Combining Katakana-hirag+ {0x0a66f, 0x0a672}, // Combining Cyrillic Vzmet..Combining Cyrillic Thous+ {0x0a674, 0x0a67d}, // Combining Cyrillic Lette..Combining Cyrillic Payer+ {0x0a69e, 0x0a69f}, // Combining Cyrillic Lette..Combining Cyrillic Lette+ {0x0a6f0, 0x0a6f1}, // Bamum Combining Mark Koq..Bamum Combining Mark Tuk+ {0x0a802, 0x0a802}, // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva+ {0x0a806, 0x0a806}, // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant+ {0x0a80b, 0x0a80b}, // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva+ {0x0a825, 0x0a826}, // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign+ {0x0a82c, 0x0a82c}, // Syloti Nagri Sign Altern..Syloti Nagri Sign Altern+ {0x0a8c4, 0x0a8c5}, // Saurashtra Sign Virama ..Saurashtra Sign Candrabi+ {0x0a8e0, 0x0a8f1}, // Combining Devanagari Dig..Combining Devanagari Sig+ {0x0a8ff, 0x0a8ff}, // Devanagari Vowel Sign Ay..Devanagari Vowel Sign Ay+ {0x0a926, 0x0a92d}, // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop+ {0x0a947, 0x0a951}, // Rejang Vowel Sign I ..Rejang Consonant Sign R+ {0x0a980, 0x0a982}, // Javanese Sign Panyangga ..Javanese Sign Layar+ {0x0a9b3, 0x0a9b3}, // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu+ {0x0a9b6, 0x0a9b9}, // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku+ {0x0a9bc, 0x0a9bd}, // Javanese Vowel Sign Pepe..Javanese Consonant Sign+ {0x0a9e5, 0x0a9e5}, // Myanmar Sign Shan Saw ..Myanmar Sign Shan Saw+ {0x0aa29, 0x0aa2e}, // Cham Vowel Sign Aa ..Cham Vowel Sign Oe+ {0x0aa31, 0x0aa32}, // Cham Vowel Sign Au ..Cham Vowel Sign Ue+ {0x0aa35, 0x0aa36}, // Cham Consonant Sign La ..Cham Consonant Sign Wa+ {0x0aa43, 0x0aa43}, // Cham Consonant Sign Fina..Cham Consonant Sign Fina+ {0x0aa4c, 0x0aa4c}, // Cham Consonant Sign Fina..Cham Consonant Sign Fina+ {0x0aa7c, 0x0aa7c}, // Myanmar Sign Tai Laing T..Myanmar Sign Tai Laing T+ {0x0aab0, 0x0aab0}, // Tai Viet Mai Kang ..Tai Viet Mai Kang+ {0x0aab2, 0x0aab4}, // Tai Viet Vowel I ..Tai Viet Vowel U+ {0x0aab7, 0x0aab8}, // Tai Viet Mai Khit ..Tai Viet Vowel Ia+ {0x0aabe, 0x0aabf}, // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek+ {0x0aac1, 0x0aac1}, // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho+ {0x0aaec, 0x0aaed}, // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign+ {0x0aaf6, 0x0aaf6}, // Meetei Mayek Virama ..Meetei Mayek Virama+ {0x0abe5, 0x0abe5}, // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign+ {0x0abe8, 0x0abe8}, // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign+ {0x0abed, 0x0abed}, // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek+ {0x0fb1e, 0x0fb1e}, // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani+ {0x0fe00, 0x0fe0f}, // Variation Selector-1 ..Variation Selector-16+ {0x0fe20, 0x0fe2f}, // Combining Ligature Left ..Combining Cyrillic Titlo+ {0x101fd, 0x101fd}, // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi+ {0x102e0, 0x102e0}, // Coptic Epact Thousands M..Coptic Epact Thousands M+ {0x10376, 0x1037a}, // Combining Old Permic Let..Combining Old Permic Let+ {0x10a01, 0x10a03}, // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo+ {0x10a05, 0x10a06}, // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O+ {0x10a0c, 0x10a0f}, // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga+ {0x10a38, 0x10a3a}, // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo+ {0x10a3f, 0x10a3f}, // Kharoshthi Virama ..Kharoshthi Virama+ {0x10ae5, 0x10ae6}, // Manichaean Abbreviation ..Manichaean Abbreviation+ {0x10d24, 0x10d27}, // Hanifi Rohingya Sign Har..Hanifi Rohingya Sign Tas+ {0x10eab, 0x10eac}, // Yezidi Combining Hamza M..Yezidi Combining Madda M+ {0x10efd, 0x10eff}, // (nil) ..(nil)+ {0x10f46, 0x10f50}, // Sogdian Combining Dot Be..Sogdian Combining Stroke+ {0x10f82, 0x10f85}, // Old Uyghur Combining Dot..Old Uyghur Combining Two+ {0x11001, 0x11001}, // Brahmi Sign Anusvara ..Brahmi Sign Anusvara+ {0x11038, 0x11046}, // Brahmi Vowel Sign Aa ..Brahmi Virama+ {0x11070, 0x11070}, // Brahmi Sign Old Tamil Vi..Brahmi Sign Old Tamil Vi+ {0x11073, 0x11074}, // Brahmi Vowel Sign Old Ta..Brahmi Vowel Sign Old Ta+ {0x1107f, 0x11081}, // Brahmi Number Joiner ..Kaithi Sign Anusvara+ {0x110b3, 0x110b6}, // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai+ {0x110b9, 0x110ba}, // Kaithi Sign Virama ..Kaithi Sign Nukta+ {0x110c2, 0x110c2}, // Kaithi Vowel Sign Vocali..Kaithi Vowel Sign Vocali+ {0x11100, 0x11102}, // Chakma Sign Candrabindu ..Chakma Sign Visarga+ {0x11127, 0x1112b}, // Chakma Vowel Sign A ..Chakma Vowel Sign Uu+ {0x1112d, 0x11134}, // Chakma Vowel Sign Ai ..Chakma Maayyaa+ {0x11173, 0x11173}, // Mahajani Sign Nukta ..Mahajani Sign Nukta+ {0x11180, 0x11181}, // Sharada Sign Candrabindu..Sharada Sign Anusvara+ {0x111b6, 0x111be}, // Sharada Vowel Sign U ..Sharada Vowel Sign O+ {0x111c9, 0x111cc}, // Sharada Sandhi Mark ..Sharada Extra Short Vowe+ {0x111cf, 0x111cf}, // Sharada Sign Inverted Ca..Sharada Sign Inverted Ca+ {0x1122f, 0x11231}, // Khojki Vowel Sign U ..Khojki Vowel Sign Ai+ {0x11234, 0x11234}, // Khojki Sign Anusvara ..Khojki Sign Anusvara+ {0x11236, 0x11237}, // Khojki Sign Nukta ..Khojki Sign Shadda+ {0x1123e, 0x1123e}, // Khojki Sign Sukun ..Khojki Sign Sukun+ {0x11241, 0x11241}, // (nil) ..(nil)+ {0x112df, 0x112df}, // Khudawadi Sign Anusvara ..Khudawadi Sign Anusvara+ {0x112e3, 0x112ea}, // Khudawadi Vowel Sign U ..Khudawadi Sign Virama+ {0x11300, 0x11301}, // Grantha Sign Combining A..Grantha Sign Candrabindu+ {0x1133b, 0x1133c}, // Combining Bindu Below ..Grantha Sign Nukta+ {0x11340, 0x11340}, // Grantha Vowel Sign Ii ..Grantha Vowel Sign Ii+ {0x11366, 0x1136c}, // Combining Grantha Digit ..Combining Grantha Digit+ {0x11370, 0x11374}, // Combining Grantha Letter..Combining Grantha Letter+ {0x11438, 0x1143f}, // Newa Vowel Sign U ..Newa Vowel Sign Ai+ {0x11442, 0x11444}, // Newa Sign Virama ..Newa Sign Anusvara+ {0x11446, 0x11446}, // Newa Sign Nukta ..Newa Sign Nukta+ {0x1145e, 0x1145e}, // Newa Sandhi Mark ..Newa Sandhi Mark+ {0x114b3, 0x114b8}, // Tirhuta Vowel Sign U ..Tirhuta Vowel Sign Vocal+ {0x114ba, 0x114ba}, // Tirhuta Vowel Sign Short..Tirhuta Vowel Sign Short+ {0x114bf, 0x114c0}, // Tirhuta Sign Candrabindu..Tirhuta Sign Anusvara+ {0x114c2, 0x114c3}, // Tirhuta Sign Virama ..Tirhuta Sign Nukta+ {0x115b2, 0x115b5}, // Siddham Vowel Sign U ..Siddham Vowel Sign Vocal+ {0x115bc, 0x115bd}, // Siddham Sign Candrabindu..Siddham Sign Anusvara+ {0x115bf, 0x115c0}, // Siddham Sign Virama ..Siddham Sign Nukta+ {0x115dc, 0x115dd}, // Siddham Vowel Sign Alter..Siddham Vowel Sign Alter+ {0x11633, 0x1163a}, // Modi Vowel Sign U ..Modi Vowel Sign Ai+ {0x1163d, 0x1163d}, // Modi Sign Anusvara ..Modi Sign Anusvara+ {0x1163f, 0x11640}, // Modi Sign Virama ..Modi Sign Ardhacandra+ {0x116ab, 0x116ab}, // Takri Sign Anusvara ..Takri Sign Anusvara+ {0x116ad, 0x116ad}, // Takri Vowel Sign Aa ..Takri Vowel Sign Aa+ {0x116b0, 0x116b5}, // Takri Vowel Sign U ..Takri Vowel Sign Au+ {0x116b7, 0x116b7}, // Takri Sign Nukta ..Takri Sign Nukta+ {0x1171d, 0x1171f}, // Ahom Consonant Sign Medi..Ahom Consonant Sign Medi+ {0x11722, 0x11725}, // Ahom Vowel Sign I ..Ahom Vowel Sign Uu+ {0x11727, 0x1172b}, // Ahom Vowel Sign Aw ..Ahom Sign Killer+ {0x1182f, 0x11837}, // Dogra Vowel Sign U ..Dogra Sign Anusvara+ {0x11839, 0x1183a}, // Dogra Sign Virama ..Dogra Sign Nukta+ {0x1193b, 0x1193c}, // Dives Akuru Sign Anusvar..Dives Akuru Sign Candrab+ {0x1193e, 0x1193e}, // Dives Akuru Virama ..Dives Akuru Virama+ {0x11943, 0x11943}, // Dives Akuru Sign Nukta ..Dives Akuru Sign Nukta+ {0x119d4, 0x119d7}, // Nandinagari Vowel Sign U..Nandinagari Vowel Sign V+ {0x119da, 0x119db}, // Nandinagari Vowel Sign E..Nandinagari Vowel Sign A+ {0x119e0, 0x119e0}, // Nandinagari Sign Virama ..Nandinagari Sign Virama+ {0x11a01, 0x11a0a}, // Zanabazar Square Vowel S..Zanabazar Square Vowel L+ {0x11a33, 0x11a38}, // Zanabazar Square Final C..Zanabazar Square Sign An+ {0x11a3b, 0x11a3e}, // Zanabazar Square Cluster..Zanabazar Square Cluster+ {0x11a47, 0x11a47}, // Zanabazar Square Subjoin..Zanabazar Square Subjoin+ {0x11a51, 0x11a56}, // Soyombo Vowel Sign I ..Soyombo Vowel Sign Oe+ {0x11a59, 0x11a5b}, // Soyombo Vowel Sign Vocal..Soyombo Vowel Length Mar+ {0x11a8a, 0x11a96}, // Soyombo Final Consonant ..Soyombo Sign Anusvara+ {0x11a98, 0x11a99}, // Soyombo Gemination Mark ..Soyombo Subjoiner+ {0x11c30, 0x11c36}, // Bhaiksuki Vowel Sign I ..Bhaiksuki Vowel Sign Voc+ {0x11c38, 0x11c3d}, // Bhaiksuki Vowel Sign E ..Bhaiksuki Sign Anusvara+ {0x11c3f, 0x11c3f}, // Bhaiksuki Sign Virama ..Bhaiksuki Sign Virama+ {0x11c92, 0x11ca7}, // Marchen Subjoined Letter..Marchen Subjoined Letter+ {0x11caa, 0x11cb0}, // Marchen Subjoined Letter..Marchen Vowel Sign Aa+ {0x11cb2, 0x11cb3}, // Marchen Vowel Sign U ..Marchen Vowel Sign E+ {0x11cb5, 0x11cb6}, // Marchen Sign Anusvara ..Marchen Sign Candrabindu+ {0x11d31, 0x11d36}, // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign+ {0x11d3a, 0x11d3a}, // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign+ {0x11d3c, 0x11d3d}, // Masaram Gondi Vowel Sign..Masaram Gondi Vowel Sign+ {0x11d3f, 0x11d45}, // Masaram Gondi Vowel Sign..Masaram Gondi Virama+ {0x11d47, 0x11d47}, // Masaram Gondi Ra-kara ..Masaram Gondi Ra-kara+ {0x11d90, 0x11d91}, // Gunjala Gondi Vowel Sign..Gunjala Gondi Vowel Sign+ {0x11d95, 0x11d95}, // Gunjala Gondi Sign Anusv..Gunjala Gondi Sign Anusv+ {0x11d97, 0x11d97}, // Gunjala Gondi Virama ..Gunjala Gondi Virama+ {0x11ef3, 0x11ef4}, // Makasar Vowel Sign I ..Makasar Vowel Sign U+ {0x11f00, 0x11f01}, // (nil) ..(nil)+ {0x11f36, 0x11f3a}, // (nil) ..(nil)+ {0x11f40, 0x11f40}, // (nil) ..(nil)+ {0x11f42, 0x11f42}, // (nil) ..(nil)+ {0x13440, 0x13440}, // (nil) ..(nil)+ {0x13447, 0x13455}, // (nil) ..(nil)+ {0x16af0, 0x16af4}, // Bassa Vah Combining High..Bassa Vah Combining High+ {0x16b30, 0x16b36}, // Pahawh Hmong Mark Cim Tu..Pahawh Hmong Mark Cim Ta+ {0x16f4f, 0x16f4f}, // Miao Sign Consonant Modi..Miao Sign Consonant Modi+ {0x16f8f, 0x16f92}, // Miao Tone Right ..Miao Tone Below+ {0x16fe4, 0x16fe4}, // Khitan Small Script Fill..Khitan Small Script Fill+ {0x1bc9d, 0x1bc9e}, // Duployan Thick Letter Se..Duployan Double Mark+ {0x1cf00, 0x1cf2d}, // Znamenny Combining Mark ..Znamenny Combining Mark+ {0x1cf30, 0x1cf46}, // Znamenny Combining Tonal..Znamenny Priznak Modifie+ {0x1d167, 0x1d169}, // Musical Symbol Combining..Musical Symbol Combining+ {0x1d17b, 0x1d182}, // Musical Symbol Combining..Musical Symbol Combining+ {0x1d185, 0x1d18b}, // Musical Symbol Combining..Musical Symbol Combining+ {0x1d1aa, 0x1d1ad}, // Musical Symbol Combining..Musical Symbol Combining+ {0x1d242, 0x1d244}, // Combining Greek Musical ..Combining Greek Musical+ {0x1da00, 0x1da36}, // Signwriting Head Rim ..Signwriting Air Sucking+ {0x1da3b, 0x1da6c}, // Signwriting Mouth Closed..Signwriting Excitement+ {0x1da75, 0x1da75}, // Signwriting Upper Body T..Signwriting Upper Body T+ {0x1da84, 0x1da84}, // Signwriting Location Hea..Signwriting Location Hea+ {0x1da9b, 0x1da9f}, // Signwriting Fill Modifie..Signwriting Fill Modifie+ {0x1daa1, 0x1daaf}, // Signwriting Rotation Mod..Signwriting Rotation Mod+ {0x1e000, 0x1e006}, // Combining Glagolitic Let..Combining Glagolitic Let+ {0x1e008, 0x1e018}, // Combining Glagolitic Let..Combining Glagolitic Let+ {0x1e01b, 0x1e021}, // Combining Glagolitic Let..Combining Glagolitic Let+ {0x1e023, 0x1e024}, // Combining Glagolitic Let..Combining Glagolitic Let+ {0x1e026, 0x1e02a}, // Combining Glagolitic Let..Combining Glagolitic Let+ {0x1e08f, 0x1e08f}, // (nil) ..(nil)+ {0x1e130, 0x1e136}, // Nyiakeng Puachue Hmong T..Nyiakeng Puachue Hmong T+ {0x1e2ae, 0x1e2ae}, // Toto Sign Rising Tone ..Toto Sign Rising Tone+ {0x1e2ec, 0x1e2ef}, // Wancho Tone Tup ..Wancho Tone Koini+ {0x1e4ec, 0x1e4ef}, // (nil) ..(nil)+ {0x1e8d0, 0x1e8d6}, // Mende Kikakui Combining ..Mende Kikakui Combining+ {0x1e944, 0x1e94a}, // Adlam Alif Lengthener ..Adlam Nukta+ {0xe0100, 0xe01ef}, // Variation Selector-17 ..Variation Selector-256};
// https://github.com/jquast/wcwidth/blob/master/wcwidth/table_wide.py
@@ -375,168 +375,175 @@
// from https://github.com/jquast/wcwidth/pull/64
// at commit 1b9b6585b0080ea5cb88dc9815796505724793fe (2022-12-16):
static struct width_interval WIDE_EASTASIAN[] = {- {0x01100, 0x0115f}, // Hangul Choseong Kiyeok ..Hangul Choseong Filler- {0x0231a, 0x0231b}, // Watch ..Hourglass- {0x02329, 0x0232a}, // Left-pointing Angle Brac..Right-pointing Angle Bra- {0x023e9, 0x023ec}, // Black Right-pointing Dou..Black Down-pointing Doub- {0x023f0, 0x023f0}, // Alarm Clock ..Alarm Clock- {0x023f3, 0x023f3}, // Hourglass With Flowing S..Hourglass With Flowing S- {0x025fd, 0x025fe}, // White Medium Small Squar..Black Medium Small Squar- {0x02614, 0x02615}, // Umbrella With Rain Drops..Hot Beverage- {0x02648, 0x02653}, // Aries ..Pisces- {0x0267f, 0x0267f}, // Wheelchair Symbol ..Wheelchair Symbol- {0x02693, 0x02693}, // Anchor ..Anchor- {0x026a1, 0x026a1}, // High Voltage Sign ..High Voltage Sign- {0x026aa, 0x026ab}, // Medium White Circle ..Medium Black Circle- {0x026bd, 0x026be}, // Soccer Ball ..Baseball- {0x026c4, 0x026c5}, // Snowman Without Snow ..Sun Behind Cloud- {0x026ce, 0x026ce}, // Ophiuchus ..Ophiuchus- {0x026d4, 0x026d4}, // No Entry ..No Entry- {0x026ea, 0x026ea}, // Church ..Church- {0x026f2, 0x026f3}, // Fountain ..Flag In Hole- {0x026f5, 0x026f5}, // Sailboat ..Sailboat- {0x026fa, 0x026fa}, // Tent ..Tent- {0x026fd, 0x026fd}, // Fuel Pump ..Fuel Pump- {0x02705, 0x02705}, // White Heavy Check Mark ..White Heavy Check Mark- {0x0270a, 0x0270b}, // Raised Fist ..Raised Hand- {0x02728, 0x02728}, // Sparkles ..Sparkles- {0x0274c, 0x0274c}, // Cross Mark ..Cross Mark- {0x0274e, 0x0274e}, // Negative Squared Cross M..Negative Squared Cross M- {0x02753, 0x02755}, // Black Question Mark Orna..White Exclamation Mark O- {0x02757, 0x02757}, // Heavy Exclamation Mark S..Heavy Exclamation Mark S- {0x02795, 0x02797}, // Heavy Plus Sign ..Heavy Division Sign- {0x027b0, 0x027b0}, // Curly Loop ..Curly Loop- {0x027bf, 0x027bf}, // Double Curly Loop ..Double Curly Loop- {0x02b1b, 0x02b1c}, // Black Large Square ..White Large Square- {0x02b50, 0x02b50}, // White Medium Star ..White Medium Star- {0x02b55, 0x02b55}, // Heavy Large Circle ..Heavy Large Circle- {0x02e80, 0x02e99}, // Cjk Radical Repeat ..Cjk Radical Rap- {0x02e9b, 0x02ef3}, // Cjk Radical Choke ..Cjk Radical C-simplified- {0x02f00, 0x02fd5}, // Kangxi Radical One ..Kangxi Radical Flute- {0x02ff0, 0x02ffb}, // Ideographic Description ..Ideographic Description- {0x03000, 0x0303e}, // Ideographic Space ..Ideographic Variation In- {0x03041, 0x03096}, // Hiragana Letter Small A ..Hiragana Letter Small Ke- {0x03099, 0x030ff}, // Combining Katakana-hirag..Katakana Digraph Koto- {0x03105, 0x0312f}, // Bopomofo Letter B ..Bopomofo Letter Nn- {0x03131, 0x0318e}, // Hangul Letter Kiyeok ..Hangul Letter Araeae- {0x03190, 0x031e3}, // Ideographic Annotation L..Cjk Stroke Q- {0x031f0, 0x0321e}, // Katakana Letter Small Ku..Parenthesized Korean Cha- {0x03220, 0x03247}, // Parenthesized Ideograph ..Circled Ideograph Koto- {0x03250, 0x04dbf}, // Partnership Sign ..Cjk Unified Ideograph-4d- {0x04e00, 0x0a48c}, // Cjk Unified Ideograph-4e..Yi Syllable Yyr- {0x0a490, 0x0a4c6}, // Yi Radical Qot ..Yi Radical Ke- {0x0a960, 0x0a97c}, // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo- {0x0ac00, 0x0d7a3}, // Hangul Syllable Ga ..Hangul Syllable Hih- {0x0f900, 0x0faff}, // Cjk Compatibility Ideogr..(nil)- {0x0fe10, 0x0fe19}, // Presentation Form For Ve..Presentation Form For Ve- {0x0fe30, 0x0fe52}, // Presentation Form For Ve..Small Full Stop- {0x0fe54, 0x0fe66}, // Small Semicolon ..Small Equals Sign- {0x0fe68, 0x0fe6b}, // Small Reverse Solidus ..Small Commercial At- {0x0ff01, 0x0ff60}, // Fullwidth Exclamation Ma..Fullwidth Right White Pa- {0x0ffe0, 0x0ffe6}, // Fullwidth Cent Sign ..Fullwidth Won Sign- {0x16fe0, 0x16fe4}, // Tangut Iteration Mark ..Khitan Small Script Fill- {0x16ff0, 0x16ff1}, // Vietnamese Alternate Rea..Vietnamese Alternate Rea- {0x17000, 0x187f7}, // (nil) ..(nil)- {0x18800, 0x18cd5}, // Tangut Component-001 ..Khitan Small Script Char- {0x18d00, 0x18d08}, // (nil) ..(nil)- {0x1aff0, 0x1aff3}, // Katakana Letter Minnan T..Katakana Letter Minnan T- {0x1aff5, 0x1affb}, // Katakana Letter Minnan T..Katakana Letter Minnan N- {0x1affd, 0x1affe}, // Katakana Letter Minnan N..Katakana Letter Minnan N- {0x1b000, 0x1b122}, // Katakana Letter Archaic ..Katakana Letter Archaic- {0x1b132, 0x1b132}, // (nil) ..(nil)- {0x1b150, 0x1b152}, // Hiragana Letter Small Wi..Hiragana Letter Small Wo- {0x1b155, 0x1b155}, // (nil) ..(nil)- {0x1b164, 0x1b167}, // Katakana Letter Small Wi..Katakana Letter Small N- {0x1b170, 0x1b2fb}, // Nushu Character-1b170 ..Nushu Character-1b2fb- {0x1f004, 0x1f004}, // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon- {0x1f0cf, 0x1f0cf}, // Playing Card Black Joker..Playing Card Black Joker- {0x1f18e, 0x1f18e}, // Negative Squared Ab ..Negative Squared Ab- {0x1f191, 0x1f19a}, // Squared Cl ..Squared Vs- {0x1f200, 0x1f202}, // Square Hiragana Hoka ..Squared Katakana Sa- {0x1f210, 0x1f23b}, // Squared Cjk Unified Ideo..Squared Cjk Unified Ideo- {0x1f240, 0x1f248}, // Tortoise Shell Bracketed..Tortoise Shell Bracketed- {0x1f250, 0x1f251}, // Circled Ideograph Advant..Circled Ideograph Accept- {0x1f260, 0x1f265}, // Rounded Symbol For Fu ..Rounded Symbol For Cai- {0x1f300, 0x1f320}, // Cyclone ..Shooting Star- {0x1f32d, 0x1f335}, // Hot Dog ..Cactus- {0x1f337, 0x1f37c}, // Tulip ..Baby Bottle- {0x1f37e, 0x1f393}, // Bottle With Popping Cork..Graduation Cap- {0x1f3a0, 0x1f3ca}, // Carousel Horse ..Swimmer- {0x1f3cf, 0x1f3d3}, // Cricket Bat And Ball ..Table Tennis Paddle And- {0x1f3e0, 0x1f3f0}, // House Building ..European Castle- {0x1f3f4, 0x1f3f4}, // Waving Black Flag ..Waving Black Flag- {0x1f3f8, 0x1f43e}, // Badminton Racquet And Sh..Paw Prints- {0x1f440, 0x1f440}, // Eyes ..Eyes- {0x1f442, 0x1f4fc}, // Ear ..Videocassette- {0x1f4ff, 0x1f53d}, // Prayer Beads ..Down-pointing Small Red- {0x1f54b, 0x1f54e}, // Kaaba ..Menorah With Nine Branch- {0x1f550, 0x1f567}, // Clock Face One Oclock ..Clock Face Twelve-thirty- {0x1f57a, 0x1f57a}, // Man Dancing ..Man Dancing- {0x1f595, 0x1f596}, // Reversed Hand With Middl..Raised Hand With Part Be- {0x1f5a4, 0x1f5a4}, // Black Heart ..Black Heart- {0x1f5fb, 0x1f64f}, // Mount Fuji ..Person With Folded Hands- {0x1f680, 0x1f6c5}, // Rocket ..Left Luggage- {0x1f6cc, 0x1f6cc}, // Sleeping Accommodation ..Sleeping Accommodation- {0x1f6d0, 0x1f6d2}, // Place Of Worship ..Shopping Trolley- {0x1f6d5, 0x1f6d7}, // Hindu Temple ..Elevator- {0x1f6dc, 0x1f6df}, // (nil) ..Ring Buoy- {0x1f6eb, 0x1f6ec}, // Airplane Departure ..Airplane Arriving- {0x1f6f4, 0x1f6fc}, // Scooter ..Roller Skate- {0x1f7e0, 0x1f7eb}, // Large Orange Circle ..Large Brown Square- {0x1f7f0, 0x1f7f0}, // Heavy Equals Sign ..Heavy Equals Sign- {0x1f90c, 0x1f93a}, // Pinched Fingers ..Fencer- {0x1f93c, 0x1f945}, // Wrestlers ..Goal Net- {0x1f947, 0x1f9ff}, // First Place Medal ..Nazar Amulet- {0x1fa70, 0x1fa7c}, // Ballet Shoes ..Crutch- {0x1fa80, 0x1fa88}, // Yo-yo ..(nil)- {0x1fa90, 0x1fabd}, // Ringed Planet ..(nil)- {0x1fabf, 0x1fac5}, // (nil) ..Person With Crown- {0x1face, 0x1fadb}, // (nil) ..(nil)- {0x1fae0, 0x1fae8}, // Melting Face ..(nil)- {0x1faf0, 0x1faf8}, // Hand With Index Finger A..(nil)- {0x20000, 0x2fffd}, // Cjk Unified Ideograph-20..(nil)- {0x30000, 0x3fffd}, // Cjk Unified Ideograph-30..(nil)+ {0x01100, 0x0115f}, // Hangul Choseong Kiyeok ..Hangul Choseong Filler+ {0x0231a, 0x0231b}, // Watch ..Hourglass+ {0x02329, 0x0232a}, // Left-pointing Angle Brac..Right-pointing Angle Bra+ {0x023e9, 0x023ec}, // Black Right-pointing Dou..Black Down-pointing Doub+ {0x023f0, 0x023f0}, // Alarm Clock ..Alarm Clock+ {0x023f3, 0x023f3}, // Hourglass With Flowing S..Hourglass With Flowing S+ {0x025fd, 0x025fe}, // White Medium Small Squar..Black Medium Small Squar+ {0x02614, 0x02615}, // Umbrella With Rain Drops..Hot Beverage+ {0x02648, 0x02653}, // Aries ..Pisces+ {0x0267f, 0x0267f}, // Wheelchair Symbol ..Wheelchair Symbol+ {0x02693, 0x02693}, // Anchor ..Anchor+ {0x026a1, 0x026a1}, // High Voltage Sign ..High Voltage Sign+ {0x026aa, 0x026ab}, // Medium White Circle ..Medium Black Circle+ {0x026bd, 0x026be}, // Soccer Ball ..Baseball+ {0x026c4, 0x026c5}, // Snowman Without Snow ..Sun Behind Cloud+ {0x026ce, 0x026ce}, // Ophiuchus ..Ophiuchus+ {0x026d4, 0x026d4}, // No Entry ..No Entry+ {0x026ea, 0x026ea}, // Church ..Church+ {0x026f2, 0x026f3}, // Fountain ..Flag In Hole+ {0x026f5, 0x026f5}, // Sailboat ..Sailboat+ {0x026fa, 0x026fa}, // Tent ..Tent+ {0x026fd, 0x026fd}, // Fuel Pump ..Fuel Pump+ {0x02705, 0x02705}, // White Heavy Check Mark ..White Heavy Check Mark+ {0x0270a, 0x0270b}, // Raised Fist ..Raised Hand+ {0x02728, 0x02728}, // Sparkles ..Sparkles+ {0x0274c, 0x0274c}, // Cross Mark ..Cross Mark+ {0x0274e, 0x0274e}, // Negative Squared Cross M..Negative Squared Cross M+ {0x02753, 0x02755}, // Black Question Mark Orna..White Exclamation Mark O+ {0x02757, 0x02757}, // Heavy Exclamation Mark S..Heavy Exclamation Mark S+ {0x02795, 0x02797}, // Heavy Plus Sign ..Heavy Division Sign+ {0x027b0, 0x027b0}, // Curly Loop ..Curly Loop+ {0x027bf, 0x027bf}, // Double Curly Loop ..Double Curly Loop+ {0x02b1b, 0x02b1c}, // Black Large Square ..White Large Square+ {0x02b50, 0x02b50}, // White Medium Star ..White Medium Star+ {0x02b55, 0x02b55}, // Heavy Large Circle ..Heavy Large Circle+ {0x02e80, 0x02e99}, // Cjk Radical Repeat ..Cjk Radical Rap+ {0x02e9b, 0x02ef3}, // Cjk Radical Choke ..Cjk Radical C-simplified+ {0x02f00, 0x02fd5}, // Kangxi Radical One ..Kangxi Radical Flute+ {0x02ff0, 0x02ffb}, // Ideographic Description ..Ideographic Description+ {0x03000, 0x0303e}, // Ideographic Space ..Ideographic Variation In+ {0x03041, 0x03096}, // Hiragana Letter Small A ..Hiragana Letter Small Ke+ {0x03099, 0x030ff}, // Combining Katakana-hirag..Katakana Digraph Koto+ {0x03105, 0x0312f}, // Bopomofo Letter B ..Bopomofo Letter Nn+ {0x03131, 0x0318e}, // Hangul Letter Kiyeok ..Hangul Letter Araeae+ {0x03190, 0x031e3}, // Ideographic Annotation L..Cjk Stroke Q+ {0x031f0, 0x0321e}, // Katakana Letter Small Ku..Parenthesized Korean Cha+ {0x03220, 0x03247}, // Parenthesized Ideograph ..Circled Ideograph Koto+ {0x03250, 0x04dbf}, // Partnership Sign ..Cjk Unified Ideograph-4d+ {0x04e00, 0x0a48c}, // Cjk Unified Ideograph-4e..Yi Syllable Yyr+ {0x0a490, 0x0a4c6}, // Yi Radical Qot ..Yi Radical Ke+ {0x0a960, 0x0a97c}, // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo+ {0x0ac00, 0x0d7a3}, // Hangul Syllable Ga ..Hangul Syllable Hih+ {0x0f900, 0x0faff}, // Cjk Compatibility Ideogr..(nil)+ {0x0fe10, 0x0fe19}, // Presentation Form For Ve..Presentation Form For Ve+ {0x0fe30, 0x0fe52}, // Presentation Form For Ve..Small Full Stop+ {0x0fe54, 0x0fe66}, // Small Semicolon ..Small Equals Sign+ {0x0fe68, 0x0fe6b}, // Small Reverse Solidus ..Small Commercial At+ {0x0ff01, 0x0ff60}, // Fullwidth Exclamation Ma..Fullwidth Right White Pa+ {0x0ffe0, 0x0ffe6}, // Fullwidth Cent Sign ..Fullwidth Won Sign+ {0x16fe0, 0x16fe4}, // Tangut Iteration Mark ..Khitan Small Script Fill+ {0x16ff0, 0x16ff1}, // Vietnamese Alternate Rea..Vietnamese Alternate Rea+ {0x17000, 0x187f7}, // (nil) ..(nil)+ {0x18800, 0x18cd5}, // Tangut Component-001 ..Khitan Small Script Char+ {0x18d00, 0x18d08}, // (nil) ..(nil)+ {0x1aff0, 0x1aff3}, // Katakana Letter Minnan T..Katakana Letter Minnan T+ {0x1aff5, 0x1affb}, // Katakana Letter Minnan T..Katakana Letter Minnan N+ {0x1affd, 0x1affe}, // Katakana Letter Minnan N..Katakana Letter Minnan N+ {0x1b000, 0x1b122}, // Katakana Letter Archaic ..Katakana Letter Archaic+ {0x1b132, 0x1b132}, // (nil) ..(nil)+ {0x1b150, 0x1b152}, // Hiragana Letter Small Wi..Hiragana Letter Small Wo+ {0x1b155, 0x1b155}, // (nil) ..(nil)+ {0x1b164, 0x1b167}, // Katakana Letter Small Wi..Katakana Letter Small N+ {0x1b170, 0x1b2fb}, // Nushu Character-1b170 ..Nushu Character-1b2fb+ {0x1f004, 0x1f004}, // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon+ {0x1f0cf, 0x1f0cf}, // Playing Card Black Joker..Playing Card Black Joker+ {0x1f18e, 0x1f18e}, // Negative Squared Ab ..Negative Squared Ab+ {0x1f191, 0x1f19a}, // Squared Cl ..Squared Vs+ {0x1f200, 0x1f202}, // Square Hiragana Hoka ..Squared Katakana Sa+ {0x1f210, 0x1f23b}, // Squared Cjk Unified Ideo..Squared Cjk Unified Ideo+ {0x1f240, 0x1f248}, // Tortoise Shell Bracketed..Tortoise Shell Bracketed+ {0x1f250, 0x1f251}, // Circled Ideograph Advant..Circled Ideograph Accept+ {0x1f260, 0x1f265}, // Rounded Symbol For Fu ..Rounded Symbol For Cai+ {0x1f300, 0x1f320}, // Cyclone ..Shooting Star+ {0x1f32d, 0x1f335}, // Hot Dog ..Cactus+ {0x1f337, 0x1f37c}, // Tulip ..Baby Bottle+ {0x1f37e, 0x1f393}, // Bottle With Popping Cork..Graduation Cap+ {0x1f3a0, 0x1f3ca}, // Carousel Horse ..Swimmer+ {0x1f3cf, 0x1f3d3}, // Cricket Bat And Ball ..Table Tennis Paddle And+ {0x1f3e0, 0x1f3f0}, // House Building ..European Castle+ {0x1f3f4, 0x1f3f4}, // Waving Black Flag ..Waving Black Flag+ {0x1f3f8, 0x1f43e}, // Badminton Racquet And Sh..Paw Prints+ {0x1f440, 0x1f440}, // Eyes ..Eyes+ {0x1f442, 0x1f4fc}, // Ear ..Videocassette+ {0x1f4ff, 0x1f53d}, // Prayer Beads ..Down-pointing Small Red+ {0x1f54b, 0x1f54e}, // Kaaba ..Menorah With Nine Branch+ {0x1f550, 0x1f567}, // Clock Face One Oclock ..Clock Face Twelve-thirty+ {0x1f57a, 0x1f57a}, // Man Dancing ..Man Dancing+ {0x1f595, 0x1f596}, // Reversed Hand With Middl..Raised Hand With Part Be+ {0x1f5a4, 0x1f5a4}, // Black Heart ..Black Heart+ {0x1f5fb, 0x1f64f}, // Mount Fuji ..Person With Folded Hands+ {0x1f680, 0x1f6c5}, // Rocket ..Left Luggage+ {0x1f6cc, 0x1f6cc}, // Sleeping Accommodation ..Sleeping Accommodation+ {0x1f6d0, 0x1f6d2}, // Place Of Worship ..Shopping Trolley+ {0x1f6d5, 0x1f6d7}, // Hindu Temple ..Elevator+ {0x1f6dc, 0x1f6df}, // (nil) ..Ring Buoy+ {0x1f6eb, 0x1f6ec}, // Airplane Departure ..Airplane Arriving+ {0x1f6f4, 0x1f6fc}, // Scooter ..Roller Skate+ {0x1f7e0, 0x1f7eb}, // Large Orange Circle ..Large Brown Square+ {0x1f7f0, 0x1f7f0}, // Heavy Equals Sign ..Heavy Equals Sign+ {0x1f90c, 0x1f93a}, // Pinched Fingers ..Fencer+ {0x1f93c, 0x1f945}, // Wrestlers ..Goal Net+ {0x1f947, 0x1f9ff}, // First Place Medal ..Nazar Amulet+ {0x1fa70, 0x1fa7c}, // Ballet Shoes ..Crutch+ {0x1fa80, 0x1fa88}, // Yo-yo ..(nil)+ {0x1fa90, 0x1fabd}, // Ringed Planet ..(nil)+ {0x1fabf, 0x1fac5}, // (nil) ..Person With Crown+ {0x1face, 0x1fadb}, // (nil) ..(nil)+ {0x1fae0, 0x1fae8}, // Melting Face ..(nil)+ {0x1faf0, 0x1faf8}, // Hand With Index Finger A..(nil)+ {0x20000, 0x2fffd}, // Cjk Unified Ideograph-20..(nil)+ {0x30000, 0x3fffd}, // Cjk Unified Ideograph-30..(nil)};
-static bool intable(struct width_interval* table, int table_length, int c) {- // First quick check for Latin1 etc. characters.
- if (c < table[0].start) return false;
+static bool
+intable(struct width_interval* table, int table_length, int c)
+{+ // First quick check for Latin1 etc. characters.
+ if(c < table[0].start)
+ return false;
- // Binary search in table.
- int bot = 0;
- int top = table_length - 1;
- while (top >= bot) {- int mid = (bot + top) / 2;
- if (table[mid].end < c) {- bot = mid + 1;
- } else if (table[mid].start > c) {- top = mid - 1;
- } else {- return true;
- }
- }
- return false;
-}
+ // Binary search in table.
+ int bot = 0;
+ int top = table_length - 1;
+ while(top >= bot){+ int mid = (bot + top) / 2;
+ if(table[mid].end < c){+ bot = mid + 1;
+ }else if (table[mid].start > c){+ top = mid - 1;
+ }else{+ return true;
+ }
+ }
+ return false;
+}
-int wcwidth(wchar_t ucs) {+int
+wcwidth(Rune ucs)
+{// NOTE: created by hand, there isn't anything identifiable other than
// general Cf category code to identify these, and some characters in Cf
// category code are of non-zero width.
- if (ucs == 0 ||
- ucs == 0x034F ||
- (0x200B <= ucs && ucs <= 0x200F) ||
- ucs == 0x2028 ||
- ucs == 0x2029 ||
- (0x202A <= ucs && ucs <= 0x202E) ||
- (0x2060 <= ucs && ucs <= 0x2063)) {- return 0;
- }
+ if(ucs == 0 ||
+ ucs == 0x034F ||
+ (0x200B <= ucs && ucs <= 0x200F) ||
+ ucs == 0x2028 ||
+ ucs == 0x2029 ||
+ (0x202A <= ucs && ucs <= 0x202E) ||
+ (0x2060 <= ucs && ucs <= 0x2063)){+ return 0;
+ }
- // C0/C1 control characters.
- if (ucs < 32 || (0x07F <= ucs && ucs < 0x0A0)) return -1;
+ // C0/C1 control characters.
+ if(ucs < 32 || (0x07F <= ucs && ucs < 0x0A0))
+ return -1;
- // Combining characters with zero width.
- if (intable(ZERO_WIDTH, sizeof(ZERO_WIDTH)/sizeof(struct width_interval), ucs)) return 0;
+ // Combining characters with zero width.
+ if(intable(ZERO_WIDTH, sizeof(ZERO_WIDTH)/sizeof(struct width_interval), ucs))
+ return 0;
- return intable(WIDE_EASTASIAN, sizeof(WIDE_EASTASIAN)/sizeof(struct width_interval), ucs) ? 2 : 1;
+ return intable(WIDE_EASTASIAN, sizeof(WIDE_EASTASIAN)/sizeof(struct width_interval), ucs) ? 2 : 1;
}
--- a/aliases.scm
+++ b/aliases.scm
@@ -62,7 +62,7 @@
(define (infinite? x) (or (equal? x +inf.0) (equal? x -inf.0)))
(define (char->integer c) (fixnum c))
-(define (integer->char i) (wchar i))
+(define (integer->char i) (rune i))
(define char-upcase char.upcase)
(define char-downcase char.downcase)
(define char=? eqv?)
--- a/builtins.c
+++ b/builtins.c
@@ -94,8 +94,8 @@
cv = (cvalue_t*)ptr(a);
if(cp_class(cv) == bytetype)
return fixnum(1);
- if(cp_class(cv) == wchartype)
- return fixnum(u8_charlen(*(uint32_t*)cp_data(cv)));
+ if(cp_class(cv) == runetype)
+ return fixnum(runelen(*(Rune*)cp_data(cv)));
}
if(iscvalue(a) && cv_class(ptr(a))->eltype != nil)
return size_wrap(cvalue_arraylen(a));
--- a/cvalues.c
+++ b/cvalues.c
@@ -10,9 +10,9 @@
value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
value_t int64sym, uint64sym, bignumsym;
-value_t longsym, ulongsym, bytesym, wcharsym;
+value_t longsym, ulongsym, bytesym, runesym;
value_t floatsym, doublesym;
-value_t gftypesym, stringtypesym, wcstringtypesym;
+value_t gftypesym, stringtypesym, runestringtypesym;
value_t emptystringsym;
value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
@@ -27,8 +27,8 @@
static fltype_t *int64type, *uint64type;
static fltype_t *longtype, *ulongtype;
static fltype_t *floattype, *doubletype;
-fltype_t *bytetype, *wchartype;
-fltype_t *stringtype, *wcstringtype;
+fltype_t *bytetype, *runetype;
+fltype_t *stringtype, *runestringtype;
fltype_t *builtintype;
static size_t malloc_pressure = 0;
@@ -307,7 +307,6 @@
num_ctor(int64, int64_t, T_INT64)
num_ctor(uint64, uint64_t, T_UINT64)
num_ctor(byte, uint8_t, T_UINT8)
-num_ctor(wchar, int32_t, T_INT32)
#if defined(ULONG64)
num_ctor(long, int64_t, T_INT64)
num_ctor(ulong, uint64_t, T_UINT64)
@@ -317,6 +316,7 @@
#endif
num_ctor(float, float, T_FLOAT)
num_ctor(double, double, T_DOUBLE)
+num_ctor(rune, uint32_t, T_UINT32)
static int
cvalue_mpint_init(fltype_t *type, value_t arg, void *dest)
@@ -1622,7 +1622,6 @@
ctor_cv_intern(int64, T_INT64, int64_t);
ctor_cv_intern(uint64, T_UINT64, uint64_t);
ctor_cv_intern(byte, T_UINT8, uint8_t);
- ctor_cv_intern(wchar, T_INT32, int32_t);
#if defined(ULONG64)
ctor_cv_intern(long, T_INT64, int64_t);
ctor_cv_intern(ulong, T_UINT64, uint64_t);
@@ -1630,6 +1629,7 @@
ctor_cv_intern(long, T_INT32, int32_t);
ctor_cv_intern(ulong, T_UINT32, uint32_t);
#endif
+ ctor_cv_intern(rune, T_UINT32, uint32_t);
ctor_cv_intern(float, T_FLOAT, float);
ctor_cv_intern(double, T_DOUBLE, double);
@@ -1644,8 +1644,8 @@
stringtypesym = symbol("*string-type*");setc(stringtypesym, fl_list2(arraysym, bytesym));
- wcstringtypesym = symbol("*wcstring-type*");- setc(wcstringtypesym, fl_list2(arraysym, wcharsym));
+ runestringtypesym = symbol("*runestring-type*");+ setc(runestringtypesym, fl_list2(arraysym, runesym));
mk_primtype(int8, int8_t);
mk_primtype(uint8, uint8_t);
@@ -1663,7 +1663,7 @@
mk_primtype(ulong, uint32_t);
#endif
mk_primtype(byte, uint8_t);
- mk_primtype(wchar, int32_t);
+ mk_primtype(rune, uint32_t);
mk_primtype(float, float);
mk_primtype(double, double);
@@ -1673,7 +1673,7 @@
mpinttype->vtable = &mpint_vtable;
stringtype = get_type(symbol_value(stringtypesym));
- wcstringtype = get_type(symbol_value(wcstringtypesym));
+ runestringtype = get_type(symbol_value(runestringtypesym));
emptystringsym = symbol("*empty-string*"); setc(emptystringsym, cvalue_static_cstring(""));--- a/cvalues.h
+++ b/cvalues.h
@@ -17,13 +17,13 @@
extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
extern value_t int64sym, uint64sym, bignumsym;
-extern value_t longsym, ulongsym, bytesym, wcharsym;
+extern value_t longsym, ulongsym, bytesym, runesym;
extern value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
-extern value_t stringtypesym, wcstringtypesym, emptystringsym;
+extern value_t stringtypesym, runestringtypesym, emptystringsym;
extern value_t unionsym, floatsym, doublesym;
-extern fltype_t *bytetype, *wchartype;
-extern fltype_t *stringtype, *wcstringtype;
+extern fltype_t *bytetype, *runetype;
+extern fltype_t *stringtype, *runestringtype;
extern fltype_t *builtintype;
extern htable_t TypeTable;
@@ -76,7 +76,7 @@
value_t mk_uint32(uint32_t n);
value_t mk_int64(int64_t n);
value_t mk_uint64(uint64_t n);
-value_t mk_wchar(int32_t n);
+value_t mk_rune(Rune n);
/* builtins.c */
size_t llength(value_t v);
--- a/equal.c
+++ b/equal.c
@@ -85,7 +85,7 @@
if(isfixnum(b))
return (numval(a) < numval(b)) ? fixnum(-1) : fixnum(1);
if(iscprim(b)){- if(cp_class((cprim_t*)ptr(b)) == wchartype)
+ if(cp_class((cprim_t*)ptr(b)) == runetype)
return fixnum(1);
return fixnum(numeric_compare(a, b, eq, 1, 0));
}
@@ -106,10 +106,10 @@
return bounded_vector_compare(a, b, bound, eq);
break;
case TAG_CPRIM:
- if(cp_class((cprim_t*)ptr(a)) == wchartype){- if(!iscprim(b) || cp_class(ptr(b)) != wchartype)
+ if(cp_class((cprim_t*)ptr(a)) == runetype){+ if(!iscprim(b) || cp_class(ptr(b)) != runetype)
return fixnum(-1);
- }else if(iscprim(b) && cp_class(ptr(b)) == wchartype)
+ }else if(iscprim(b) && cp_class(ptr(b)) == runetype)
return fixnum(1);
c = numeric_compare(a, b, eq, 1, 0);
if(c != 2)
@@ -366,8 +366,8 @@
case TAG_CPRIM:
cp = ptr(a);
data = cp_data(cp);
- if(cp_class(cp) == wchartype)
- return inthash(*(int32_t*)data);
+ if(cp_class(cp) == runetype)
+ return inthash(*(Rune*)data);
nt = cp_numtype(cp);
u.d = conv_to_double(data, nt);
return doublehash(u.i64);
--- a/flisp.boot
+++ b/flisp.boot
@@ -56,7 +56,7 @@
length=) 1arg-lambda?)
<= #fn("7000n210L;IB0470051;380470151S:" #(nan?) <=) > #fn("7000n210L:" #() >) >= #fn("7000n201L;IB0470051;380470151S:" #(nan?) >=)- Instructions #table(brne 19 vargc 76 load1 27 = 60 setc.l 75 sub2 80 brne.l 85 largc 81 brnn 26 loadc.l 70 loadi8 66 < 28 nop 46 set-cdr! 30 loada 8 neg 37 bound? 42 / 58 brn.l 88 lvargc 82 brt 25 trycatch 77 null? 38 load0 21 jmp.l 48 loadv 2 seta 15 keyargs 91 * 57 function? 44 builtin? 43 aref 23 optargs 89 loadt 20 vector? 45 cdr 13 brf 3 loadc00 17 symbol? 34 cadr 36 pop 4 pair? 18 for 78 closure 14 loadf 31 compare 61 loadv.l 67 setg.l 72 brn 87 eqv? 51 aset! 64 atom? 24 eq? 33 boolean? 39 brt.l 50 tapply 79 dummy_nil 94 loada0 0 brbound 90 dup 11 loadc01 22 list 53 loadc 9 apply 54 dummy_t 93 setg 71 loada1 1 tcall.l 84 jmp 16 fixnum? 41 cons 32 loadg.l 68 tcall 6 dummy_eof 95 call 5 - 56 brf.l 49 + 55 dummy_f 92 add2 29 seta.l 73 loadnil 65 brnn.l 86 setc 74 set-car! 47 loadg 7 vector 63 loada.l 69 argc 62 div0 59 ret 10 car 12 number? 40 equal? 52 call.l 83 not 35)
+ Instructions #table(not 35 vargc 76 load1 27 = 60 setc.l 75 sub2 80 brne.l 85 largc 81 brnn 26 loadc.l 70 loadi8 66 < 28 nop 46 set-cdr! 30 loada 8 neg 37 bound? 42 / 58 brn.l 88 lvargc 82 brt 25 trycatch 77 null? 38 load0 21 jmp.l 48 loadv 2 seta 15 keyargs 91 * 57 function? 44 builtin? 43 aref 23 optargs 89 loadt 20 vector? 45 cdr 13 brf 3 loadc00 17 symbol? 34 cadr 36 pop 4 pair? 18 for 78 closure 14 loadf 31 compare 61 loadv.l 67 setg.l 72 brn 87 eqv? 51 aset! 64 atom? 24 eq? 33 boolean? 39 brt.l 50 tapply 79 dummy_nil 94 loada0 0 brbound 90 dup 11 loadc01 22 list 53 loadc 9 apply 54 dummy_t 93 setg 71 loada1 1 tcall.l 84 jmp 16 fixnum? 41 cons 32 loadg.l 68 tcall 6 dummy_eof 95 call 5 - 56 brf.l 49 + 55 dummy_f 92 add2 29 seta.l 73 loadnil 65 brnn.l 86 setc 74 set-car! 47 loadg 7 vector 63 loada.l 69 argc 62 div0 59 ret 10 car 12 number? 40 equal? 52 call.l 83 brne 19)
__init_globals #fn("6000n020w1422w3474w5476w7478w9:" #("/"*directory-separator*
"\n"
@@ -128,7 +128,7 @@
#fn("6000n10<===:" #() cdddar) cddddr #fn("6000n10====:" #() cddddr) cdddr #fn("6000n10===:" #() cdddr) cddr #fn("6000n10==:" #() cddr) char? #fn("7000n12005121Q:" #(#fn(typeof)- wchar) char?)
+ rune) char?)
closure? #fn("7000n10\\;36040[S:" #() closure?) compile #fn("8000n170q062:" #(compile-f) compile) compile-and #fn("<000n470018283D2166:" #(compile-short-circuitbrf) compile-and)
--- a/flisp.c
+++ b/flisp.c
@@ -714,7 +714,7 @@
return 1;
if(iscprim(v)){cprim_t *c = ptr(v);
- return c->type != wchartype;
+ return c->type != runetype;
}
if(iscvalue(v)){cvalue_t *c = ptr(v);
--- a/ios.c
+++ b/ios.c
@@ -844,77 +844,29 @@
}
int
-ios_getutf8(ios_t *s, uint32_t *pwc)
+ios_getutf8(ios_t *s, Rune *r)
{- int c;
- size_t sz;
- char c0;
- char buf[8];
+ int c, i;
+ char buf[UTFmax];
- c = ios_peekc(s);
- if(c == IOS_EOF){- s->_eof = 1;
- return IOS_EOF;
+ for(i = 0; i < sizeof(buf); i++){+ if((c = ios_getc(s)) == IOS_EOF){+ s->_eof = 1;
+ return IOS_EOF;
+ }
+ buf[i] = c;
+ if(fullrune(buf, i+1))
+ break;
}
- c0 = (char)c;
- if((uint8_t)c0 < 0x80){- ios_getc(s);
- *pwc = (uint32_t)(uint8_t)c0;
- return 1;
- }
- sz = u8_seqlen(&c0)-1;
- if(!isutf(c0) || sz > 3)
- return 0;
- if(ios_readprep(s, sz) < sz){- // NOTE: this returns EOF even though some bytes are available
- // so we do not set s->_eof on this code path
- return IOS_EOF;
- }
- if(u8_isvalid(&s->buf[s->bpos], sz+1)){- size_t i = s->bpos;
- *pwc = u8_nextchar(s->buf, &i);
- ios_read(s, buf, sz+1);
- return 1;
- }
- return 0;
+ chartorune(r, buf);
+ return *r == Runeerror ? 0 : 1;
}
int
-ios_peekutf8(ios_t *s, uint32_t *pwc)
+ios_pututf8(ios_t *s, Rune r)
{- int c;
- size_t sz;
- char c0;
-
- c = ios_peekc(s);
- if(c == IOS_EOF)
- return IOS_EOF;
- c0 = (char)c;
- if((uint8_t)c0 < 0x80){- *pwc = (uint32_t)(uint8_t)c0;
- return 1;
- }
- sz = u8_seqlen(&c0)-1;
- if(!isutf(c0) || sz > 3)
- return 0;
- if(ios_readprep(s, sz) < sz)
- return IOS_EOF;
- if(u8_isvalid(&s->buf[s->bpos], sz+1)){- size_t i = s->bpos;
- *pwc = u8_nextchar(s->buf, &i);
- return 1;
- }
- return 0;
-}
-
-int
-ios_pututf8(ios_t *s, uint32_t wc)
-{- char buf[8];
- if(wc < 0x80)
- return ios_putc((int)wc, s);
- size_t n = u8_toutf8(buf, 8, &wc, 1);
- return ios_write(s, buf, n);
+ char buf[UTFmax];
+ return ios_write(s, buf, runetochar(buf, &r));
}
void
--- a/ios.h
+++ b/ios.h
@@ -95,7 +95,7 @@
void ios_init_stdstreams(void);
/* high-level functions - output */
-int ios_pututf8(ios_t *s, uint32_t wc);
+int ios_pututf8(ios_t *s, Rune r);
int ios_printf(ios_t *s, const char *format, ...);
int ios_vprintf(ios_t *s, const char *format, va_list args);
@@ -102,8 +102,7 @@
void hexdump(ios_t *dest, const char *buffer, size_t len, size_t startoffs);
/* high-level stream functions - input */
-int ios_getutf8(ios_t *s, uint32_t *pwc);
-int ios_peekutf8(ios_t *s, uint32_t *pwc);
+int ios_getutf8(ios_t *s, Rune *r);
// discard data buffered for reading
void ios_purge(ios_t *s);
--- a/iostream.c
+++ b/iostream.c
@@ -134,37 +134,24 @@
{argcount(nargs, 1);
ios_t *s = fl_toiostream(args[0]);
- uint32_t wc;
+ Rune r;
int res;
- if((res = ios_getutf8(s, &wc)) == IOS_EOF)
+ if((res = ios_getutf8(s, &r)) == IOS_EOF)
//lerrorf(IOError, "end of file reached");
return FL_EOF;
if(res == 0)
lerrorf(IOError, "invalid UTF-8 sequence");
- return mk_wchar(wc);
+ return mk_rune(r);
}
-BUILTIN("io.peekc", io_peekc)-{- argcount(nargs, 1);
- ios_t *s = fl_toiostream(args[0]);
- uint32_t wc;
- int res;
- if((res = ios_peekutf8(s, &wc)) == IOS_EOF)
- return FL_EOF;
- if(res == 0)
- lerrorf(IOError, "invalid UTF-8 sequence");
- return mk_wchar(wc);
-}
-
BUILTIN("io.putc", io_putc) {argcount(nargs, 2);
ios_t *s = fl_toiostream(args[0]);
- if(!iscprim(args[1]) || ((cprim_t*)ptr(args[1]))->type != wchartype)
- type_error("wchar", args[1]);- uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[1]));
- return fixnum(ios_pututf8(s, wc));
+ if(!iscprim(args[1]) || ((cprim_t*)ptr(args[1]))->type != runetype)
+ type_error("rune", args[1]);+ Rune r = *(Rune*)cp_data((cprim_t*)ptr(args[1]));
+ return fixnum(ios_pututf8(s, r));
}
BUILTIN("io.skip", io_skip)@@ -281,11 +268,11 @@
if(nargs < 2 || nargs > 4)
argcount(nargs, 2);
ios_t *s = fl_toiostream(args[0]);
- if(iscprim(args[1]) && ((cprim_t*)ptr(args[1]))->type == wchartype){+ if(iscprim(args[1]) && ((cprim_t*)ptr(args[1]))->type == runetype){if(nargs > 2)
lerrorf(ArgError, "offset argument not supported for characters");
- uint32_t wc = *(uint32_t*)cp_data(ptr(args[1]));
- return fixnum(ios_pututf8(s, wc));
+ Rune r = *(Rune*)cp_data(ptr(args[1]));
+ return fixnum(ios_pututf8(s, r));
}
char *data;
size_t sz, offs = 0;
@@ -320,8 +307,8 @@
{size_t uldelim = toulong(arg);
if(uldelim > 0x7f){- // wchars > 0x7f, or anything else > 0xff, are out of range
- if((iscprim(arg) && cp_class(ptr(arg)) == wchartype) || uldelim > 0xff)
+ // runes > 0x7f, or anything else > 0xff, are out of range
+ if((iscprim(arg) && cp_class(ptr(arg)) == runetype) || uldelim > 0xff)
lerrorf(ArgError, "delimiter out of range");
}
return (char)uldelim;
--- a/operators.h
+++ b/operators.h
@@ -14,6 +14,7 @@
uint64_t conv_to_uint64(void *data, numerictype_t tag);
int32_t conv_to_int32(void *data, numerictype_t tag);
uint32_t conv_to_uint32(void *data, numerictype_t tag);
+Rune conv_to_Rune(void *data, numerictype_t tag);
#if defined(ULONG64)
#define conv_to_long conv_to_int64
--- a/plan9/platform.h
+++ b/plan9/platform.h
@@ -100,7 +100,6 @@
typedef uintptr uintptr_t;
typedef intptr ssize_t;
typedef uintptr size_t;
-typedef Rune wchar_t;
typedef enum { false, true } bool;-int wcwidth(wchar_t c);
+int wcwidth(Rune c);
--- a/print.c
+++ b/print.c
@@ -227,7 +227,7 @@
// get the width of an expression if we can do so cheaply
if(issymbol(v))
return u8_strwidth(symbol_name(v));
- if(iscprim(v) && ptr(v) != nil && cp_class((cprim_t*)ptr(v)) == wchartype)
+ if(iscprim(v) && ptr(v) != nil && cp_class((cprim_t*)ptr(v)) == runetype)
return 4;
return -1;
}
@@ -642,37 +642,32 @@
HPOS += ios_printf(f, "0x%hhx", ch);
else
HPOS += ios_printf(f, "#byte(0x%hhx)", ch);
- }else if(type == wcharsym){- uint32_t wc = *(uint32_t*)data;
- char seq[8];
- size_t nb = u8_toutf8(seq, sizeof(seq), &wc, 1);
+ }else if(type == runesym){+ Rune r = *(Rune*)data;
+ char seq[UTFmax+1];
+ int nb = runetochar(seq, &r);
seq[nb] = '\0';
if(print_princ){- // TODO: better multibyte handling
- if(wc == 0)
- ios_putc(0, f);
- else
- outs(seq, f);
+ outsn(seq, f, nb);
}else{ outsn("#\\", f, 2);- switch(wc){+ switch(r){ case 0x00: outsn("nul", f, 3); break; case 0x07: outsn("alarm", f, 5); break; case 0x08: outsn("backspace", f, 9); break; case 0x09: outsn("tab", f, 3); break;- case 'l': outsn("linefeed", f, 8); break; case 0x0a: outsn("newline", f, 7); break;- case 0x0B: outsn("vtab", f, 4); break;- case 0x0C: outsn("page", f, 4); break;- case 0x0D: outsn("return", f, 6); break;- case 0x1B: outsn("esc", f, 3); break;- case 's': outsn("space", f, 5); break;- case 0x7F: outsn("delete", f, 6); break;+ case 0x0b: outsn("vtab", f, 4); break;+ case 0x0c: outsn("page", f, 4); break;+ case 0x0d: outsn("return", f, 6); break;+ case 0x1b: outsn("esc", f, 3); break;+ case ' ': outsn("space", f, 5); break;+ case 0x7f: outsn("delete", f, 6); break;default:
- if(u8_iswprint(wc))
+ if(u8_iswprint(r))
outs(seq, f);
else
- HPOS += ios_printf(f, "x%04x", (int)wc);
+ HPOS += ios_printf(f, "x%04x", r);
break;
}
}
@@ -771,9 +766,12 @@
print_string(f, (char*)data, len);
}
return;
- }else if(eltype == wcharsym){- // TODO wchar
+ }else if(eltype == runesym){+ char buf[UTFmax];
+ print_string(f, buf, runetochar(buf, (Rune*)data));
}else{+ /* FIXME */
+ assert(0 == 1);
}
size_t i;
if(!weak){--- a/read.c
+++ b/read.c
@@ -314,7 +314,7 @@
lerrorf(ParseError, "unknown character #\\%s", buf);
}
toktype = TOK_NUM;
- tokval = mk_wchar(cval);
+ tokval = mk_rune(cval);
}else if(c == '('){toktype = TOK_SHARPOPEN;
}else if(c == '<'){@@ -482,7 +482,7 @@
size_t i = 0, j, sz = 64, ndig;
int c;
value_t s;
- uint32_t wc = 0;
+ Rune r = 0;
buf = malloc(sz);
while(1){@@ -518,9 +518,9 @@
ios_getc(F);
}
eseq[j] = '\0';
- wc = strtol(eseq, nil, 8);
+ r = strtol(eseq, nil, 8);
// \DDD and \xXX read bytes, not characters
- buf[i++] = ((char)wc);
+ buf[i++] = (char)r;
}else if((c == 'x' && (ndig = 2)) || (c == 'u' && (ndig = 4)) || (c == 'U' && (ndig = 8))){ while(1){c = ios_peekc(F);
@@ -531,15 +531,15 @@
}
eseq[j] = '\0';
if(j)
- wc = strtol(eseq, nil, 16);
- if(!j || wc > 0x10ffff){+ r = strtol(eseq, nil, 16);
+ if(!j || r > 0x10ffff){free(buf);
lerrorf(ParseError, "invalid escape sequence");
}
if(ndig == 2)
- buf[i++] = ((char)wc);
+ buf[i++] = (char)r;
else
- i += u8_wc_toutf8(&buf[i], wc);
+ i += runetochar(&buf[i], &r);
}else{char esc = read_escape_control_char((char)c);
if(esc == (char)c && !strchr("\\'\"`", esc)){--- a/string.c
+++ b/string.c
@@ -45,8 +45,8 @@
argcount(nargs, 1);
if(iscprim(args[0])){cprim_t *cp = ptr(args[0]);
- if(cp_class(cp) == wchartype){- int w = wcwidth(*(wchar_t*)cp_data(cp));
+ if(cp_class(cp) == runetype){+ int w = wcwidth(*(Rune*)cp_data(cp));
return w < 0 ? FL_F : fixnum(w);
}
}
@@ -70,17 +70,18 @@
if(iscvalue(args[0])){cvalue_t *cv = ptr(args[0]);
fltype_t *t = cv_class(cv);
- if(t->eltype == wchartype){- size_t nc = cv_len(cv) / sizeof(uint32_t);
- uint32_t *ptr = (uint32_t*)cv_data(cv);
- size_t nbytes = u8_codingsize(ptr, nc);
- value_t str = cvalue_string(nbytes);
- ptr = cv_data(ptr(args[0])); // relocatable pointer
- u8_toutf8(cvalue_data(str), nbytes, ptr, nc);
+ if(t->eltype == runetype){+ size_t nr = cv_len(cv) / sizeof(Rune);
+ Rune *r = (Rune*)cv_data(cv);
+ size_t nb = runenlen(r, nr);
+ value_t str = cvalue_string(nb);
+ char *s = cvalue_data(str);
+ for(size_t i = 0; i < nr; i++)
+ s += runetochar(s, r+i);
return str;
}
}
- type_error("wchar array", args[0]);+ type_error("rune array", args[0]);}
BUILTIN("string.decode", string_decode)@@ -95,17 +96,18 @@
cvalue_t *cv = ptr(args[0]);
char *ptr = (char*)cv_data(cv);
size_t nb = cv_len(cv);
- size_t nc = u8_charnum(ptr, nb);
- size_t newsz = nc*sizeof(uint32_t);
+ size_t nc = utfnlen(ptr, nb);
+ size_t newsz = nc*sizeof(Rune);
if(term)
- newsz += sizeof(uint32_t);
- value_t wcstr = cvalue(wcstringtype, newsz);
+ newsz += sizeof(Rune);
+ value_t runestr = cvalue(runestringtype, newsz);
ptr = cv_data(ptr(args[0])); // relocatable pointer
- uint32_t *pwc = cvalue_data(wcstr);
- u8_toucs(pwc, nc, ptr, nb);
+ Rune *r = cvalue_data(runestr);
+ for(size_t i = 0; i < nb; i++)
+ ptr += chartorune(r+i, ptr);
if(term)
- pwc[nc] = 0;
- return wcstr;
+ r[nb] = 0;
+ return runestr;
}
extern BUILTIN("buffer", buffer);@@ -212,7 +214,9 @@
size_t sl = u8_seqlen(&s[i]);
if(sl > len || i > len-sl)
bounds_error(args[0], args[1]);
- return mk_wchar(u8_nextchar(s, &i));
+ Rune r;
+ chartorune(&r, s+i);
+ return mk_rune(r);
}
BUILTIN("char.upcase", char_upcase)@@ -219,9 +223,9 @@
{argcount(nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
- if(!iscprim(args[0]) || cp_class(cp) != wchartype)
- type_error("wchar", args[0]);- return mk_wchar(towupper(*(int32_t*)cp_data(cp)));
+ if(!iscprim(args[0]) || cp_class(cp) != runetype)
+ type_error("rune", args[0]);+ return mk_rune(toupperrune(*(Rune*)cp_data(cp)));
}
BUILTIN("char.downcase", char_downcase)@@ -228,9 +232,9 @@
{argcount(nargs, 1);
cprim_t *cp = ptr(args[0]);
- if(!iscprim(args[0]) || cp_class(cp) != wchartype)
- type_error("wchar", args[0]);- return mk_wchar(towlower(*(int32_t*)cp_data(cp)));
+ if(!iscprim(args[0]) || cp_class(cp) != runetype)
+ type_error("rune", args[0]);+ return mk_rune(tolowerrune(*(Rune*)cp_data(cp)));
}
BUILTIN("char-alphabetic?", char_alphabeticp)@@ -237,23 +241,14 @@
{argcount(nargs, 1);
cprim_t *cp = (cprim_t*)ptr(args[0]);
- if(!iscprim(args[0]) || cp_class(cp) != wchartype)
- type_error("wchar", args[0]);- return iswalpha(*(int32_t*)cp_data(cp)) ? FL_T : FL_F;
+ if(!iscprim(args[0]) || cp_class(cp) != runetype)
+ type_error("rune", args[0]);+ return isalpharune(*(Rune*)cp_data(cp)) ? FL_T : FL_F;
}
-static value_t
-mem_find_byte(char *s, char c, size_t start, size_t len)
-{- char *p = memchr(s+start, c, len-start);
- if(p == nil)
- return FL_F;
- return size_wrap((size_t)(p - s));
-}
-
BUILTIN("string.find", string_find) {- char cbuf[8];
+ char cbuf[UTFmax+1];
size_t start = 0;
if(nargs == 3)
start = toulong(args[2]);
@@ -267,14 +262,16 @@
value_t v = args[1];
cprim_t *cp = ptr(v);
- if(iscprim(v) && cp_class(cp) == wchartype){- uint32_t c = *(uint32_t*)cp_data(cp);
- if(c <= 0x7f)
- return mem_find_byte(s, (char)c, start, len);
- needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1);
+ if(iscprim(v) && cp_class(cp) == runetype){+ Rune r = *(Rune*)cp_data(cp);
+ needlesz = runetochar(cbuf, &r);
needle = cbuf;
+ needle[needlesz] = 0;
}else if(iscprim(v) && cp_class(cp) == bytetype){- return mem_find_byte(s, *(char*)cp_data(cp), start, len);
+ needlesz = 1;
+ needle = cbuf;
+ needle[0] = *(char*)cp_data(cp);
+ needle[needlesz] = 0;
}else if(fl_isstring(v)){cvalue_t *cv = (cvalue_t*)ptr(v);
needlesz = cv_len(cv);
@@ -284,8 +281,6 @@
}
if(needlesz > len-start)
return FL_F;
- if(needlesz == 1)
- return mem_find_byte(s, needle[0], start, len);
if(needlesz == 0)
return size_wrap(start);
size_t i;
--- a/system.lsp
+++ b/system.lsp
@@ -144,7 +144,7 @@
(define (min x0 . xs)
(if (null? xs) x0
(foldl (λ (a b) (if (< a b) a b)) x0 xs)))
-(define (char? x) (eq? (typeof x) 'wchar))
+(define (char? x) (eq? (typeof x) 'rune))
(define (array? x) (or (vector? x)
(let ((t (typeof x)))
(and (pair? t) (eq? (car t) 'array)))))
@@ -679,7 +679,7 @@
(define (string.tail s n) (string.sub s (string.inc s 0 n)))
(define *whitespace*
- (string.encode #array(wchar 9 10 11 12 13 32 133 160 5760 6158 8192
+ (string.encode #array(rune 9 10 11 12 13 32 133 160 5760 6158 8192
8193 8194 8195 8196 8197 8198 8199 8200
8201 8202 8232 8233 8239 8287 12288)))
--- a/test/unittest.lsp
+++ b/test/unittest.lsp
@@ -77,7 +77,7 @@
(assert (equal? (uint64 (double -123)) #uint64(0xffffffffffffff85)))
-(assert (equal? (string 'sym #byte(65) #wchar(945) "blah") "symA\u03B1blah"))
+(assert (equal? (string 'sym #byte(65) #rune(945) "blah") "symA\u03B1blah"))
(assert (= (length (string #\x0)) 1))
(assert (> 9223372036854775808 9223372036854775807))
--- a/utf8.c
+++ b/utf8.c
@@ -49,148 +49,6 @@
return trailingBytesForUTF8[(unsigned int)(uint8_t)s[0]] + 1;
}
-/* returns the # of bytes needed to encode a certain character
- 0 means the character cannot (or should not) be encoded. */
-size_t
-u8_charlen(uint32_t ch)
-{- if(ch < 0x80)
- return 1;
- if(ch < 0x800)
- return 2;
- if(ch < 0x10000)
- return 3;
- if(ch < 0x110000)
- return 4;
- return 0;
-}
-
-size_t
-u8_codingsize(uint32_t *wcstr, size_t n)
-{- size_t i, c = 0;
-
- for(i = 0; i < n; i++)
- c += u8_charlen(wcstr[i]);
- return c;
-}
-
-/* conversions without error checking
- only works for valid UTF-8, i.e. no 5- or 6-byte sequences
- srcsz = source size in bytes
- sz = dest size in # of wide characters
-
- returns # characters converted
- if sz == srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
-*/
-size_t
-u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz)
-{- uint32_t ch;
- const char *src_end = src + srcsz;
- size_t nb, i = 0;
-
- if(sz == 0 || srcsz == 0)
- return 0;
-
- while(i < sz){- if(!isutf(*src)){ // invalid sequence- dest[i++] = 0xFFFD;
- src++;
- if(src >= src_end)
- break;
- continue;
- }
- nb = trailingBytesForUTF8[(uint8_t)*src];
- if(src + nb >= src_end)
- break;
- ch = 0;
- switch(nb){- case 5: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 4: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 3: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 2: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 1: ch += (uint8_t)*src++; ch <<= 6; // fallthrough
- case 0: ch += (uint8_t)*src++;
- }
- ch -= offsetsFromUTF8[nb];
- dest[i++] = ch;
- }
- return i;
-}
-
-/*
- * srcsz = number of source characters
- * sz = size of dest buffer in bytes
- * returns # bytes stored in dest
- * the destination string will never be bigger than the source string.
-*/
-size_t
-u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz)
-{- uint32_t ch;
- size_t i = 0;
- char *dest0 = dest;
- char *dest_end = dest + sz;
-
- while(i < srcsz){- ch = src[i];
- if(ch < 0x80){- if(dest >= dest_end)
- break;
- *dest++ = (char)ch;
- }else if(ch < 0x800){- if(dest >= dest_end-1)
- break;
- *dest++ = (ch>>6) | 0xC0;
- *dest++ = (ch & 0x3F) | 0x80;
- }else if(ch < 0x10000){- if(dest >= dest_end-2)
- break;
- *dest++ = (ch>>12) | 0xE0;
- *dest++ = ((ch>>6) & 0x3F) | 0x80;
- *dest++ = (ch & 0x3F) | 0x80;
- }else if(ch < 0x110000){- if(dest >= dest_end-3)
- break;
- *dest++ = (ch>>18) | 0xF0;
- *dest++ = ((ch>>12) & 0x3F) | 0x80;
- *dest++ = ((ch>>6) & 0x3F) | 0x80;
- *dest++ = (ch & 0x3F) | 0x80;
- }
- i++;
- }
- return dest-dest0;
-}
-
-size_t
-u8_wc_toutf8(char *dest, uint32_t ch)
-{- if(ch < 0x80){- dest[0] = (char)ch;
- return 1;
- }
- if(ch < 0x800){- dest[0] = (ch>>6) | 0xC0;
- dest[1] = (ch & 0x3F) | 0x80;
- return 2;
- }
- if(ch < 0x10000){- dest[0] = (ch>>12) | 0xE0;
- dest[1] = ((ch>>6) & 0x3F) | 0x80;
- dest[2] = (ch & 0x3F) | 0x80;
- return 3;
- }
- if(ch < 0x110000){- dest[0] = (ch>>18) | 0xF0;
- dest[1] = ((ch>>12) & 0x3F) | 0x80;
- dest[2] = ((ch>>6) & 0x3F) | 0x80;
- dest[3] = (ch & 0x3F) | 0x80;
- return 4;
- }
- return 0;
-}
-
/* byte offset => charnum */
size_t
u8_charnum(const char *s, size_t offset)
@@ -208,56 +66,16 @@
size_t
u8_strwidth(const char *s)
{- uint32_t ch;
- size_t nb, tot = 0;
- int w;
- signed char sc;
+ size_t i, w;
+ Rune r;
- while((sc = (signed char)*s) != 0){- if(sc >= 0){- s++;
- if(sc)
- tot++;
- }else{- if(!isutf(sc)){- tot++;
- s++;
- continue;
- }
- nb = trailingBytesForUTF8[(uint8_t)sc];
- ch = 0;
- switch(nb){- case 5: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 4: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 3: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 2: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 1: ch += (uint8_t)*s++; ch <<= 6; // fallthrough
- case 0: ch += (uint8_t)*s++;
- }
- ch -= offsetsFromUTF8[nb];
- w = wcwidth(ch); // might return -1
- if(w > 0)
- tot += w;
- }
+ for(i = w = 0; s[i];){+ i += chartorune(&r, s+i);
+ w += wcwidth(r);
}
- return tot;
+ return w;
}
-/* reads the next utf-8 sequence out of a string, updating an index */
-uint32_t
-u8_nextchar(const char *s, size_t *i)
-{- uint32_t ch = 0;
- size_t sz = 0;
-
- do{- ch <<= 6;
- ch += (uint8_t)s[(*i)];
- sz++;
- }while(s[*i] && (++(*i)) && !isutf(s[*i]));
- return ch - offsetsFromUTF8[sz-1];
-}
-
/* next character without NUL character terminator */
uint32_t
u8_nextmemchar(const char *s, size_t *i)
@@ -311,7 +129,7 @@
}
int
-u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
+u8_escape_rune(char *buf, size_t sz, Rune ch)
{assert(sz > 2);
if(ch >= 0x20 && ch < 0x7f){@@ -358,7 +176,7 @@
i0 = i;
ch = u8_nextmemchar(src, &i);
if(ascii || !u8_iswprint(ch)){- buf += u8_escape_wchar(buf, sz - (buf-start), ch);
+ buf += u8_escape_rune(buf, sz - (buf-start), ch);
}else{i = i0;
do{--- a/utf8.h
+++ b/utf8.h
@@ -6,21 +6,9 @@
int u8_iswprint(uint32_t c);
-/* convert UTF-8 data to wide character */
-size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
-
-/* the opposite conversion */
-size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
-
-/* single character to UTF-8, returns # bytes written */
-size_t u8_wc_toutf8(char *dest, uint32_t ch);
-
/* byte offset to character number */
size_t u8_charnum(const char *s, size_t offset);
-/* return next character, updating an index variable */
-uint32_t u8_nextchar(const char *s, size_t *i);
-
/* next character without NUL character terminator */
uint32_t u8_nextmemchar(const char *s, size_t *i);
@@ -27,18 +15,12 @@
/* returns length of next utf-8 sequence */
size_t u8_seqlen(const char *s);
-/* returns the # of bytes needed to encode a certain character */
-size_t u8_charlen(uint32_t ch);
-
-/* computes the # of bytes needed to encode a WC string as UTF-8 */
-size_t u8_codingsize(uint32_t *wcstr, size_t n);
-
char read_escape_control_char(char c);
/* given a wide character, convert it to an ASCII escape sequence stored in
buf, where buf is "sz" bytes. returns the number of characters output.
sz must be at least 3. */
-int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
+int u8_escape_rune(char *buf, size_t sz, Rune ch);
/* convert UTF-8 "src" to escape sequences.
--
⑨