Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/data/jtr/dumb32.conf
Views: 11766
# This software is Copyright (c) 2012-2020 magnum, and it is hereby1# released to the general public under the following terms:2# Redistribution and use in source and binary forms, with or without3# modification, are permitted.4#5# Generic implementation of "dumb" exhaustive search of FULL Unicode.6# Default is to try *all* allocated characters in Unicode v13 (there's7# 143,532 of them). Even if a fast format can exhaust two characters in one8# hour, three characters would take 12 years...9#10# Note that these modes will handle --max-len differently than normal: They11# will consider number of characters as opposed to number of bytes. This12# means you can naturally just use e.g. --max-len=3 for generating all13# three-character candidates (which may be up to 12 bytes each).14#15# Also note that for UTF-16 formats, the resulting plaintext size within the16# format will be up to four bytes (two 16-bit words) due to use of surrogates17# for characters above U+FFFF. This means a format which normally handles up18# to 27 characters may be limited to only 13 characters, worst case.19#20# Note that the (newer) cracking mode --subsets=full-unicode is way faster than21# this external mode, although not as easy to adapt to smaller portions of the22# Unicode space. See doc/SUBSETS2324[List.External:Dumb32]25int maxlength; // Maximum password length to try26int last; // Last character position, zero-based27int lastid; // Character index in the last position28int id[0x7f]; // Current character indices for other positions29int charset[0x24000], c0; // Characters3031void init()32{33int minlength;34int i, c;3536# Trigger UTF-32 handling in External mode37utf32 = 1;3839if (req_minlen)40minlength = req_minlen;41else42minlength = 1;43if (req_maxlen)44maxlength = req_maxlen;45else46maxlength = 2;4748/*49* This defines the character set. This is auto-generated from UnicodeData.txt50* and we skip control characters.51*/52i = 0;53// 0000..007F; Basic Latin54c = 0x20; // from SPACE55while (c <= 0x7e) // ..to TILDE56charset[i++] = c++;57// 0080..00FF; Latin-1 Supplement58c = 0xa0; // from NO-BREAK SPACE59while (c <= 0xff) // ..to LATIN SMALL LETTER Y WITH DIAERESIS60charset[i++] = c++;61// 0100..017F; Latin Extended-A62c = 0x100; // from LATIN CAPITAL LETTER A WITH MACRON63while (c <= 0x17f) // ..to LATIN SMALL LETTER LONG S64charset[i++] = c++;65// 0180..024F; Latin Extended-B66c = 0x180; // from LATIN SMALL LETTER B WITH STROKE67while (c <= 0x24f) // ..to LATIN SMALL LETTER Y WITH STROKE68charset[i++] = c++;69// 0250..02AF; IPA Extensions70c = 0x250; // from LATIN SMALL LETTER TURNED A71while (c <= 0x2af) // ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL72charset[i++] = c++;73// 02B0..02FF; Spacing Modifier Letters74c = 0x2b0; // from MODIFIER LETTER SMALL H75while (c <= 0x2ff) // ..to MODIFIER LETTER LOW LEFT ARROW76charset[i++] = c++;77// 0300..036F; Combining Diacritical Marks78c = 0x300; // from COMBINING GRAVE ACCENT79while (c <= 0x36f) // ..to COMBINING LATIN SMALL LETTER X80charset[i++] = c++;81// 0370..03FF; Greek and Coptic82c = 0x370; // from GREEK CAPITAL LETTER HETA83while (c <= 0x377) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA84charset[i++] = c++;85c = 0x37a; // from GREEK YPOGEGRAMMENI86while (c <= 0x37f) // ..to GREEK CAPITAL LETTER YOT87charset[i++] = c++;88c = 0x384; // from GREEK TONOS89while (c <= 0x38a) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS90charset[i++] = c++;91c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS92while (c <= 0x3a1) // ..to GREEK CAPITAL LETTER RHO93charset[i++] = c++;94c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA95while (c <= 0x3ff) // ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL96charset[i++] = c++;97// 0400..04FF; Cyrillic98c = 0x400; // from CYRILLIC CAPITAL LETTER IE WITH GRAVE99while (c <= 0x4ff) // ..to CYRILLIC SMALL LETTER HA WITH STROKE100charset[i++] = c++;101// 0500..052F; Cyrillic Supplement102c = 0x500; // from CYRILLIC CAPITAL LETTER KOMI DE103while (c <= 0x52f) // ..to CYRILLIC SMALL LETTER EL WITH DESCENDER104charset[i++] = c++;105// 0530..058F; Armenian106c = 0x531; // from ARMENIAN CAPITAL LETTER AYB107while (c <= 0x556) // ..to ARMENIAN CAPITAL LETTER FEH108charset[i++] = c++;109c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING110while (c <= 0x58a) // ..to ARMENIAN HYPHEN111charset[i++] = c++;112charset[i++] = 0x58d; // RIGHT-FACING ARMENIAN ETERNITY SIGN113charset[i++] = 0x58f; // ARMENIAN DRAM SIGN114// 0590..05FF; Hebrew115c = 0x591; // from HEBREW ACCENT ETNAHTA116while (c <= 0x5c7) // ..to HEBREW POINT QAMATS QATAN117charset[i++] = c++;118c = 0x5d0; // from HEBREW LETTER ALEF119while (c <= 0x5ea) // ..to HEBREW LETTER TAV120charset[i++] = c++;121c = 0x5ef; // from HEBREW YOD TRIANGLE122while (c <= 0x5f4) // ..to HEBREW PUNCTUATION GERSHAYIM123charset[i++] = c++;124// 0600..06FF; Arabic125c = 0x600; // from ARABIC NUMBER SIGN126while (c <= 0x61c) // ..to ARABIC LETTER MARK127charset[i++] = c++;128c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK129while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V130charset[i++] = c++;131// 0700..074F; Syriac132c = 0x700; // from SYRIAC END OF PARAGRAPH133while (c <= 0x70d) // ..to SYRIAC HARKLEAN ASTERISCUS134charset[i++] = c++;135c = 0x70f; // from SYRIAC ABBREVIATION MARK136while (c <= 0x74a) // ..to SYRIAC BARREKH137charset[i++] = c++;138charset[i++] = 0x74d; // SYRIAC LETTER SOGDIAN ZHAIN139charset[i++] = 0x74f; // SYRIAC LETTER SOGDIAN FE140// 0750..077F; Arabic Supplement141c = 0x750; // from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW142while (c <= 0x77f) // ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE143charset[i++] = c++;144// 0780..07BF; Thaana145c = 0x780; // from THAANA LETTER HAA146while (c <= 0x7b1) // ..to THAANA LETTER NAA147charset[i++] = c++;148// 07C0..07FF; NKo149c = 0x7c0; // from NKO DIGIT ZERO150while (c <= 0x7fa) // ..to NKO LAJANYALAN151charset[i++] = c++;152charset[i++] = 0x7fd; // NKO DANTAYALAN153charset[i++] = 0x7ff; // NKO TAMAN SIGN154// 0800..083F; Samaritan155c = 0x800; // from SAMARITAN LETTER ALAF156while (c <= 0x82d) // ..to SAMARITAN MARK NEQUDAA157charset[i++] = c++;158c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA159while (c <= 0x83e) // ..to SAMARITAN PUNCTUATION ANNAAU160charset[i++] = c++;161// 0840..085F; Mandaic162c = 0x840; // from MANDAIC LETTER HALQA163while (c <= 0x85b) // ..to MANDAIC GEMINATION MARK164charset[i++] = c++;165charset[i++] = 0x85e; // MANDAIC PUNCTUATION166// 0860..086F; Syriac Supplement167c = 0x860; // from SYRIAC LETTER MALAYALAM NGA168while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA169charset[i++] = c++;170// 08A0..08FF; Arabic Extended-A171c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW172while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW173charset[i++] = c++;174c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE175while (c <= 0x8c7) // ..to ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE176charset[i++] = c++;177c = 0x8d3; // from ARABIC SMALL LOW WAW178while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA179charset[i++] = c++;180// 0900..097F; Devanagari181c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU182while (c <= 0x97f) // ..to DEVANAGARI LETTER BBA183charset[i++] = c++;184// 0980..09FF; Bengali185c = 0x980; // from BENGALI ANJI186while (c <= 0x983) // ..to BENGALI SIGN VISARGA187charset[i++] = c++;188c = 0x985; // from BENGALI LETTER A189while (c <= 0x98c) // ..to BENGALI LETTER VOCALIC L190charset[i++] = c++;191charset[i++] = 0x98f; // BENGALI LETTER E192charset[i++] = 0x990; // BENGALI LETTER AI193c = 0x993; // from BENGALI LETTER O194while (c <= 0x9a8) // ..to BENGALI LETTER NA195charset[i++] = c++;196c = 0x9aa; // from BENGALI LETTER PA197while (c <= 0x9b0) // ..to BENGALI LETTER RA198charset[i++] = c++;199c = 0x9b6; // from BENGALI LETTER SHA200while (c <= 0x9b9) // ..to BENGALI LETTER HA201charset[i++] = c++;202c = 0x9bc; // from BENGALI SIGN NUKTA203while (c <= 0x9c4) // ..to BENGALI VOWEL SIGN VOCALIC RR204charset[i++] = c++;205charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E206charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI207c = 0x9cb; // from BENGALI VOWEL SIGN O208while (c <= 0x9ce) // ..to BENGALI LETTER KHANDA TA209charset[i++] = c++;210charset[i++] = 0x9dc; // BENGALI LETTER RRA211charset[i++] = 0x9dd; // BENGALI LETTER RHA212c = 0x9df; // from BENGALI LETTER YYA213while (c <= 0x9e3) // ..to BENGALI VOWEL SIGN VOCALIC LL214charset[i++] = c++;215c = 0x9e6; // from BENGALI DIGIT ZERO216while (c <= 0x9fe) // ..to BENGALI SANDHI MARK217charset[i++] = c++;218// 0A00..0A7F; Gurmukhi219charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI220charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA221c = 0xa05; // from GURMUKHI LETTER A222while (c <= 0xa0a) // ..to GURMUKHI LETTER UU223charset[i++] = c++;224charset[i++] = 0xa0f; // GURMUKHI LETTER EE225charset[i++] = 0xa10; // GURMUKHI LETTER AI226c = 0xa13; // from GURMUKHI LETTER OO227while (c <= 0xa28) // ..to GURMUKHI LETTER NA228charset[i++] = c++;229c = 0xa2a; // from GURMUKHI LETTER PA230while (c <= 0xa30) // ..to GURMUKHI LETTER RA231charset[i++] = c++;232charset[i++] = 0xa32; // GURMUKHI LETTER LA233charset[i++] = 0xa33; // GURMUKHI LETTER LLA234charset[i++] = 0xa35; // GURMUKHI LETTER VA235charset[i++] = 0xa36; // GURMUKHI LETTER SHA236charset[i++] = 0xa38; // GURMUKHI LETTER SA237charset[i++] = 0xa39; // GURMUKHI LETTER HA238c = 0xa3e; // from GURMUKHI VOWEL SIGN AA239while (c <= 0xa42) // ..to GURMUKHI VOWEL SIGN UU240charset[i++] = c++;241charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE242charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI243charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO244charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA245c = 0xa59; // from GURMUKHI LETTER KHHA246while (c <= 0xa5c) // ..to GURMUKHI LETTER RRA247charset[i++] = c++;248c = 0xa66; // from GURMUKHI DIGIT ZERO249while (c <= 0xa76) // ..to GURMUKHI ABBREVIATION SIGN250charset[i++] = c++;251// 0A80..0AFF; Gujarati252charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU253charset[i++] = 0xa83; // GUJARATI SIGN VISARGA254c = 0xa85; // from GUJARATI LETTER A255while (c <= 0xa8d) // ..to GUJARATI VOWEL CANDRA E256charset[i++] = c++;257charset[i++] = 0xa8f; // GUJARATI LETTER E258charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O259c = 0xa93; // from GUJARATI LETTER O260while (c <= 0xaa8) // ..to GUJARATI LETTER NA261charset[i++] = c++;262c = 0xaaa; // from GUJARATI LETTER PA263while (c <= 0xab0) // ..to GUJARATI LETTER RA264charset[i++] = c++;265charset[i++] = 0xab2; // GUJARATI LETTER LA266charset[i++] = 0xab3; // GUJARATI LETTER LLA267c = 0xab5; // from GUJARATI LETTER VA268while (c <= 0xab9) // ..to GUJARATI LETTER HA269charset[i++] = c++;270c = 0xabc; // from GUJARATI SIGN NUKTA271while (c <= 0xac5) // ..to GUJARATI VOWEL SIGN CANDRA E272charset[i++] = c++;273charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E274charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O275charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O276charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA277c = 0xae0; // from GUJARATI LETTER VOCALIC RR278while (c <= 0xae3) // ..to GUJARATI VOWEL SIGN VOCALIC LL279charset[i++] = c++;280c = 0xae6; // from GUJARATI DIGIT ZERO281while (c <= 0xaf1) // ..to GUJARATI RUPEE SIGN282charset[i++] = c++;283c = 0xaf9; // from GUJARATI LETTER ZHA284while (c <= 0xaff) // ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE285charset[i++] = c++;286// 0B00..0B7F; Oriya287charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU288charset[i++] = 0xb03; // ORIYA SIGN VISARGA289c = 0xb05; // from ORIYA LETTER A290while (c <= 0xb0c) // ..to ORIYA LETTER VOCALIC L291charset[i++] = c++;292charset[i++] = 0xb0f; // ORIYA LETTER E293charset[i++] = 0xb10; // ORIYA LETTER AI294c = 0xb13; // from ORIYA LETTER O295while (c <= 0xb28) // ..to ORIYA LETTER NA296charset[i++] = c++;297c = 0xb2a; // from ORIYA LETTER PA298while (c <= 0xb30) // ..to ORIYA LETTER RA299charset[i++] = c++;300charset[i++] = 0xb32; // ORIYA LETTER LA301charset[i++] = 0xb33; // ORIYA LETTER LLA302c = 0xb35; // from ORIYA LETTER VA303while (c <= 0xb39) // ..to ORIYA LETTER HA304charset[i++] = c++;305c = 0xb3c; // from ORIYA SIGN NUKTA306while (c <= 0xb44) // ..to ORIYA VOWEL SIGN VOCALIC RR307charset[i++] = c++;308charset[i++] = 0xb47; // ORIYA VOWEL SIGN E309charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI310charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O311charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA312charset[i++] = 0xb55; // ORIYA SIGN OVERLINE313charset[i++] = 0xb57; // ORIYA AU LENGTH MARK314charset[i++] = 0xb5c; // ORIYA LETTER RRA315charset[i++] = 0xb5d; // ORIYA LETTER RHA316c = 0xb5f; // from ORIYA LETTER YYA317while (c <= 0xb63) // ..to ORIYA VOWEL SIGN VOCALIC LL318charset[i++] = c++;319c = 0xb66; // from ORIYA DIGIT ZERO320while (c <= 0xb77) // ..to ORIYA FRACTION THREE SIXTEENTHS321charset[i++] = c++;322// 0B80..0BFF; Tamil323charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA324charset[i++] = 0xb83; // TAMIL SIGN VISARGA325c = 0xb85; // from TAMIL LETTER A326while (c <= 0xb8a) // ..to TAMIL LETTER UU327charset[i++] = c++;328charset[i++] = 0xb8e; // TAMIL LETTER E329charset[i++] = 0xb90; // TAMIL LETTER AI330c = 0xb92; // from TAMIL LETTER O331while (c <= 0xb95) // ..to TAMIL LETTER KA332charset[i++] = c++;333charset[i++] = 0xb99; // TAMIL LETTER NGA334charset[i++] = 0xb9a; // TAMIL LETTER CA335charset[i++] = 0xb9e; // TAMIL LETTER NYA336charset[i++] = 0xb9f; // TAMIL LETTER TTA337charset[i++] = 0xba3; // TAMIL LETTER NNA338charset[i++] = 0xba4; // TAMIL LETTER TA339charset[i++] = 0xba8; // TAMIL LETTER NA340charset[i++] = 0xbaa; // TAMIL LETTER PA341c = 0xbae; // from TAMIL LETTER MA342while (c <= 0xbb9) // ..to TAMIL LETTER HA343charset[i++] = c++;344c = 0xbbe; // from TAMIL VOWEL SIGN AA345while (c <= 0xbc2) // ..to TAMIL VOWEL SIGN UU346charset[i++] = c++;347charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E348charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI349c = 0xbca; // from TAMIL VOWEL SIGN O350while (c <= 0xbcd) // ..to TAMIL SIGN VIRAMA351charset[i++] = c++;352c = 0xbe6; // from TAMIL DIGIT ZERO353while (c <= 0xbfa) // ..to TAMIL NUMBER SIGN354charset[i++] = c++;355// 0C00..0C7F; Telugu356c = 0xc00; // from TELUGU SIGN COMBINING CANDRABINDU ABOVE357while (c <= 0xc0c) // ..to TELUGU LETTER VOCALIC L358charset[i++] = c++;359charset[i++] = 0xc0e; // TELUGU LETTER E360charset[i++] = 0xc10; // TELUGU LETTER AI361c = 0xc12; // from TELUGU LETTER O362while (c <= 0xc28) // ..to TELUGU LETTER NA363charset[i++] = c++;364c = 0xc2a; // from TELUGU LETTER PA365while (c <= 0xc39) // ..to TELUGU LETTER HA366charset[i++] = c++;367c = 0xc3d; // from TELUGU SIGN AVAGRAHA368while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR369charset[i++] = c++;370charset[i++] = 0xc46; // TELUGU VOWEL SIGN E371charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI372c = 0xc4a; // from TELUGU VOWEL SIGN O373while (c <= 0xc4d) // ..to TELUGU SIGN VIRAMA374charset[i++] = c++;375charset[i++] = 0xc55; // TELUGU LENGTH MARK376charset[i++] = 0xc56; // TELUGU AI LENGTH MARK377charset[i++] = 0xc58; // TELUGU LETTER TSA378charset[i++] = 0xc5a; // TELUGU LETTER RRRA379c = 0xc60; // from TELUGU LETTER VOCALIC RR380while (c <= 0xc63) // ..to TELUGU VOWEL SIGN VOCALIC LL381charset[i++] = c++;382c = 0xc66; // from TELUGU DIGIT ZERO383while (c <= 0xc6f) // ..to TELUGU DIGIT NINE384charset[i++] = c++;385c = 0xc77; // from TELUGU SIGN SIDDHAM386while (c <= 0xc7f) // ..to TELUGU SIGN TUUMU387charset[i++] = c++;388// 0C80..0CFF; Kannada389c = 0xc80; // from KANNADA SIGN SPACING CANDRABINDU390while (c <= 0xc8c) // ..to KANNADA LETTER VOCALIC L391charset[i++] = c++;392charset[i++] = 0xc8e; // KANNADA LETTER E393charset[i++] = 0xc90; // KANNADA LETTER AI394c = 0xc92; // from KANNADA LETTER O395while (c <= 0xca8) // ..to KANNADA LETTER NA396charset[i++] = c++;397c = 0xcaa; // from KANNADA LETTER PA398while (c <= 0xcb3) // ..to KANNADA LETTER LLA399charset[i++] = c++;400c = 0xcb5; // from KANNADA LETTER VA401while (c <= 0xcb9) // ..to KANNADA LETTER HA402charset[i++] = c++;403c = 0xcbc; // from KANNADA SIGN NUKTA404while (c <= 0xcc4) // ..to KANNADA VOWEL SIGN VOCALIC RR405charset[i++] = c++;406charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E407charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI408c = 0xcca; // from KANNADA VOWEL SIGN O409while (c <= 0xccd) // ..to KANNADA SIGN VIRAMA410charset[i++] = c++;411charset[i++] = 0xcd5; // KANNADA LENGTH MARK412charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK413c = 0xce0; // from KANNADA LETTER VOCALIC RR414while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL415charset[i++] = c++;416c = 0xce6; // from KANNADA DIGIT ZERO417while (c <= 0xcef) // ..to KANNADA DIGIT NINE418charset[i++] = c++;419charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA420charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA421// 0D00..0D7F; Malayalam422c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE423while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L424charset[i++] = c++;425charset[i++] = 0xd0e; // MALAYALAM LETTER E426charset[i++] = 0xd10; // MALAYALAM LETTER AI427c = 0xd12; // from MALAYALAM LETTER O428while (c <= 0xd44) // ..to MALAYALAM VOWEL SIGN VOCALIC RR429charset[i++] = c++;430charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E431charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI432c = 0xd4a; // from MALAYALAM VOWEL SIGN O433while (c <= 0xd4f) // ..to MALAYALAM SIGN PARA434charset[i++] = c++;435c = 0xd54; // from MALAYALAM LETTER CHILLU M436while (c <= 0xd63) // ..to MALAYALAM VOWEL SIGN VOCALIC LL437charset[i++] = c++;438c = 0xd66; // from MALAYALAM DIGIT ZERO439while (c <= 0xd7f) // ..to MALAYALAM LETTER CHILLU K440charset[i++] = c++;441// 0D80..0DFF; Sinhala442charset[i++] = 0xd81; // SINHALA SIGN CANDRABINDU443charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA444c = 0xd85; // from SINHALA LETTER AYANNA445while (c <= 0xd96) // ..to SINHALA LETTER AUYANNA446charset[i++] = c++;447c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA448while (c <= 0xdb1) // ..to SINHALA LETTER DANTAJA NAYANNA449charset[i++] = c++;450c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA451while (c <= 0xdbb) // ..to SINHALA LETTER RAYANNA452charset[i++] = c++;453c = 0xdc0; // from SINHALA LETTER VAYANNA454while (c <= 0xdc6) // ..to SINHALA LETTER FAYANNA455charset[i++] = c++;456c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA457while (c <= 0xdd4) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA458charset[i++] = c++;459c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA460while (c <= 0xddf) // ..to SINHALA VOWEL SIGN GAYANUKITTA461charset[i++] = c++;462c = 0xde6; // from SINHALA LITH DIGIT ZERO463while (c <= 0xdef) // ..to SINHALA LITH DIGIT NINE464charset[i++] = c++;465charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA466charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA467// 0E00..0E7F; Thai468c = 0xe01; // from THAI CHARACTER KO KAI469while (c <= 0xe3a) // ..to THAI CHARACTER PHINTHU470charset[i++] = c++;471c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT472while (c <= 0xe5b) // ..to THAI CHARACTER KHOMUT473charset[i++] = c++;474// 0E80..0EFF; Lao475charset[i++] = 0xe81; // LAO LETTER KO476charset[i++] = 0xe82; // LAO LETTER KHO SUNG477c = 0xe86; // from LAO LETTER PALI GHA478while (c <= 0xe8a) // ..to LAO LETTER SO TAM479charset[i++] = c++;480c = 0xe8c; // from LAO LETTER PALI JHA481while (c <= 0xea3) // ..to LAO LETTER LO LING482charset[i++] = c++;483c = 0xea7; // from LAO LETTER WO484while (c <= 0xebd) // ..to LAO SEMIVOWEL SIGN NYO485charset[i++] = c++;486c = 0xec0; // from LAO VOWEL SIGN E487while (c <= 0xec4) // ..to LAO VOWEL SIGN AI488charset[i++] = c++;489c = 0xec8; // from LAO TONE MAI EK490while (c <= 0xecd) // ..to LAO NIGGAHITA491charset[i++] = c++;492c = 0xed0; // from LAO DIGIT ZERO493while (c <= 0xed9) // ..to LAO DIGIT NINE494charset[i++] = c++;495c = 0xedc; // from LAO HO NO496while (c <= 0xedf) // ..to LAO LETTER KHMU NYO497charset[i++] = c++;498// 0F00..0FFF; Tibetan499c = 0xf00; // from TIBETAN SYLLABLE OM500while (c <= 0xf47) // ..to TIBETAN LETTER JA501charset[i++] = c++;502c = 0xf49; // from TIBETAN LETTER NYA503while (c <= 0xf6c) // ..to TIBETAN LETTER RRA504charset[i++] = c++;505c = 0xf71; // from TIBETAN VOWEL SIGN AA506while (c <= 0xf97) // ..to TIBETAN SUBJOINED LETTER JA507charset[i++] = c++;508c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA509while (c <= 0xfbc) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA510charset[i++] = c++;511c = 0xfbe; // from TIBETAN KU RU KHA512while (c <= 0xfcc) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL513charset[i++] = c++;514c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR515while (c <= 0xfda) // ..to TIBETAN MARK TRAILING MCHAN RTAGS516charset[i++] = c++;517// 1000..109F; Myanmar518c = 0x1000; // from MYANMAR LETTER KA519while (c <= 0x109f) // ..to MYANMAR SYMBOL SHAN EXCLAMATION520charset[i++] = c++;521// 10A0..10FF; Georgian522c = 0x10a0; // from GEORGIAN CAPITAL LETTER AN523while (c <= 0x10c5) // ..to GEORGIAN CAPITAL LETTER HOE524charset[i++] = c++;525c = 0x10d0; // from GEORGIAN LETTER AN526while (c <= 0x10ff) // ..to GEORGIAN LETTER LABIAL SIGN527charset[i++] = c++;528// 1100..11FF; Hangul Jamo529c = 0x1100; // from HANGUL CHOSEONG KIYEOK530while (c <= 0x11ff) // ..to HANGUL JONGSEONG SSANGNIEUN531charset[i++] = c++;532// 1200..137F; Ethiopic533c = 0x1200; // from ETHIOPIC SYLLABLE HA534while (c <= 0x1248) // ..to ETHIOPIC SYLLABLE QWA535charset[i++] = c++;536c = 0x124a; // from ETHIOPIC SYLLABLE QWI537while (c <= 0x124d) // ..to ETHIOPIC SYLLABLE QWE538charset[i++] = c++;539c = 0x1250; // from ETHIOPIC SYLLABLE QHA540while (c <= 0x1256) // ..to ETHIOPIC SYLLABLE QHO541charset[i++] = c++;542c = 0x125a; // from ETHIOPIC SYLLABLE QHWI543while (c <= 0x125d) // ..to ETHIOPIC SYLLABLE QHWE544charset[i++] = c++;545c = 0x1260; // from ETHIOPIC SYLLABLE BA546while (c <= 0x1288) // ..to ETHIOPIC SYLLABLE XWA547charset[i++] = c++;548c = 0x128a; // from ETHIOPIC SYLLABLE XWI549while (c <= 0x128d) // ..to ETHIOPIC SYLLABLE XWE550charset[i++] = c++;551c = 0x1290; // from ETHIOPIC SYLLABLE NA552while (c <= 0x12b0) // ..to ETHIOPIC SYLLABLE KWA553charset[i++] = c++;554c = 0x12b2; // from ETHIOPIC SYLLABLE KWI555while (c <= 0x12b5) // ..to ETHIOPIC SYLLABLE KWE556charset[i++] = c++;557c = 0x12b8; // from ETHIOPIC SYLLABLE KXA558while (c <= 0x12be) // ..to ETHIOPIC SYLLABLE KXO559charset[i++] = c++;560c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI561while (c <= 0x12c5) // ..to ETHIOPIC SYLLABLE KXWE562charset[i++] = c++;563c = 0x12c8; // from ETHIOPIC SYLLABLE WA564while (c <= 0x12d6) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O565charset[i++] = c++;566c = 0x12d8; // from ETHIOPIC SYLLABLE ZA567while (c <= 0x1310) // ..to ETHIOPIC SYLLABLE GWA568charset[i++] = c++;569c = 0x1312; // from ETHIOPIC SYLLABLE GWI570while (c <= 0x1315) // ..to ETHIOPIC SYLLABLE GWE571charset[i++] = c++;572c = 0x1318; // from ETHIOPIC SYLLABLE GGA573while (c <= 0x135a) // ..to ETHIOPIC SYLLABLE FYA574charset[i++] = c++;575c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK576while (c <= 0x137c) // ..to ETHIOPIC NUMBER TEN THOUSAND577charset[i++] = c++;578// 1380..139F; Ethiopic Supplement579c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA580while (c <= 0x1399) // ..to ETHIOPIC TONAL MARK KURT581charset[i++] = c++;582// 13A0..13FF; Cherokee583c = 0x13a0; // from CHEROKEE LETTER A584while (c <= 0x13f5) // ..to CHEROKEE LETTER MV585charset[i++] = c++;586c = 0x13f8; // from CHEROKEE SMALL LETTER YE587while (c <= 0x13fd) // ..to CHEROKEE SMALL LETTER MV588charset[i++] = c++;589// 1400..167F; Unified Canadian Aboriginal Syllabics590c = 0x1400; // from CANADIAN SYLLABICS HYPHEN591while (c <= 0x167f) // ..to CANADIAN SYLLABICS BLACKFOOT W592charset[i++] = c++;593// 1680..169F; Ogham594c = 0x1680; // from OGHAM SPACE MARK595while (c <= 0x169c) // ..to OGHAM REVERSED FEATHER MARK596charset[i++] = c++;597// 16A0..16FF; Runic598c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F599while (c <= 0x16f8) // ..to RUNIC LETTER FRANKS CASKET AESC600charset[i++] = c++;601// 1700..171F; Tagalog602c = 0x1700; // from TAGALOG LETTER A603while (c <= 0x170c) // ..to TAGALOG LETTER YA604charset[i++] = c++;605c = 0x170e; // from TAGALOG LETTER LA606while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA607charset[i++] = c++;608// 1720..173F; Hanunoo609c = 0x1720; // from HANUNOO LETTER A610while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION611charset[i++] = c++;612// 1740..175F; Buhid613c = 0x1740; // from BUHID LETTER A614while (c <= 0x1753) // ..to BUHID VOWEL SIGN U615charset[i++] = c++;616// 1760..177F; Tagbanwa617c = 0x1760; // from TAGBANWA LETTER A618while (c <= 0x176c) // ..to TAGBANWA LETTER YA619charset[i++] = c++;620charset[i++] = 0x176e; // TAGBANWA LETTER LA621charset[i++] = 0x1770; // TAGBANWA LETTER SA622charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I623charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U624// 1780..17FF; Khmer625c = 0x1780; // from KHMER LETTER KA626while (c <= 0x17dd) // ..to KHMER SIGN ATTHACAN627charset[i++] = c++;628c = 0x17e0; // from KHMER DIGIT ZERO629while (c <= 0x17e9) // ..to KHMER DIGIT NINE630charset[i++] = c++;631c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON632while (c <= 0x17f9) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON633charset[i++] = c++;634// 1800..18AF; Mongolian635c = 0x1800; // from MONGOLIAN BIRGA636while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR637charset[i++] = c++;638c = 0x1810; // from MONGOLIAN DIGIT ZERO639while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE640charset[i++] = c++;641c = 0x1820; // from MONGOLIAN LETTER A642while (c <= 0x1878) // ..to MONGOLIAN LETTER CHA WITH TWO DOTS643charset[i++] = c++;644c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE645while (c <= 0x18aa) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA646charset[i++] = c++;647// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended648c = 0x18b0; // from CANADIAN SYLLABICS OY649while (c <= 0x18f5) // ..to CANADIAN SYLLABICS CARRIER DENTAL S650charset[i++] = c++;651// 1900..194F; Limbu652c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER653while (c <= 0x191e) // ..to LIMBU LETTER TRA654charset[i++] = c++;655c = 0x1920; // from LIMBU VOWEL SIGN A656while (c <= 0x192b) // ..to LIMBU SUBJOINED LETTER WA657charset[i++] = c++;658c = 0x1930; // from LIMBU SMALL LETTER KA659while (c <= 0x193b) // ..to LIMBU SIGN SA-I660charset[i++] = c++;661c = 0x1944; // from LIMBU EXCLAMATION MARK662while (c <= 0x194f) // ..to LIMBU DIGIT NINE663charset[i++] = c++;664// 1950..197F; Tai Le665c = 0x1950; // from TAI LE LETTER KA666while (c <= 0x196d) // ..to TAI LE LETTER AI667charset[i++] = c++;668c = 0x1970; // from TAI LE LETTER TONE-2669while (c <= 0x1974) // ..to TAI LE LETTER TONE-6670charset[i++] = c++;671// 1980..19DF; New Tai Lue672c = 0x1980; // from NEW TAI LUE LETTER HIGH QA673while (c <= 0x19ab) // ..to NEW TAI LUE LETTER LOW SUA674charset[i++] = c++;675c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER676while (c <= 0x19c9) // ..to NEW TAI LUE TONE MARK-2677charset[i++] = c++;678c = 0x19d0; // from NEW TAI LUE DIGIT ZERO679while (c <= 0x19da) // ..to NEW TAI LUE THAM DIGIT ONE680charset[i++] = c++;681charset[i++] = 0x19de; // NEW TAI LUE SIGN LAE682charset[i++] = 0x19df; // NEW TAI LUE SIGN LAEV683// 19E0..19FF; Khmer Symbols684c = 0x19e0; // from KHMER SYMBOL PATHAMASAT685while (c <= 0x19ff) // ..to KHMER SYMBOL DAP-PRAM ROC686charset[i++] = c++;687// 1A00..1A1F; Buginese688c = 0x1a00; // from BUGINESE LETTER KA689while (c <= 0x1a1b) // ..to BUGINESE VOWEL SIGN AE690charset[i++] = c++;691charset[i++] = 0x1a1e; // BUGINESE PALLAWA692charset[i++] = 0x1a1f; // BUGINESE END OF SECTION693// 1A20..1AAF; Tai Tham694c = 0x1a20; // from TAI THAM LETTER HIGH KA695while (c <= 0x1a5e) // ..to TAI THAM CONSONANT SIGN SA696charset[i++] = c++;697c = 0x1a60; // from TAI THAM SIGN SAKOT698while (c <= 0x1a7c) // ..to TAI THAM SIGN KHUEN-LUE KARAN699charset[i++] = c++;700c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT701while (c <= 0x1a89) // ..to TAI THAM HORA DIGIT NINE702charset[i++] = c++;703c = 0x1a90; // from TAI THAM THAM DIGIT ZERO704while (c <= 0x1a99) // ..to TAI THAM THAM DIGIT NINE705charset[i++] = c++;706c = 0x1aa0; // from TAI THAM SIGN WIANG707while (c <= 0x1aad) // ..to TAI THAM SIGN CAANG708charset[i++] = c++;709// 1AB0..1AFF; Combining Diacritical Marks Extended710c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT711while (c <= 0x1ac0) // ..to COMBINING LATIN SMALL LETTER TURNED W BELOW712charset[i++] = c++;713// 1B00..1B7F; Balinese714c = 0x1b00; // from BALINESE SIGN ULU RICEM715while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK716charset[i++] = c++;717c = 0x1b50; // from BALINESE DIGIT ZERO718while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING719charset[i++] = c++;720// 1B80..1BBF; Sundanese721c = 0x1b80; // from SUNDANESE SIGN PANYECEK722while (c <= 0x1bbf) // ..to SUNDANESE LETTER FINAL M723charset[i++] = c++;724// 1BC0..1BFF; Batak725c = 0x1bc0; // from BATAK LETTER A726while (c <= 0x1bf3) // ..to BATAK PANONGONAN727charset[i++] = c++;728c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK729while (c <= 0x1bff) // ..to BATAK SYMBOL BINDU PANGOLAT730charset[i++] = c++;731// 1C00..1C4F; Lepcha732c = 0x1c00; // from LEPCHA LETTER KA733while (c <= 0x1c37) // ..to LEPCHA SIGN NUKTA734charset[i++] = c++;735c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL736while (c <= 0x1c49) // ..to LEPCHA DIGIT NINE737charset[i++] = c++;738charset[i++] = 0x1c4d; // LEPCHA LETTER TTA739charset[i++] = 0x1c4f; // LEPCHA LETTER DDA740// 1C50..1C7F; Ol Chiki741c = 0x1c50; // from OL CHIKI DIGIT ZERO742while (c <= 0x1c7f) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD743charset[i++] = c++;744// 1C80..1C8F; Cyrillic Extended-C745c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE746while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK747charset[i++] = c++;748// 1C90..1CBF; Georgian Extended749c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN750while (c <= 0x1cba) // ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN751charset[i++] = c++;752charset[i++] = 0x1cbd; // GEORGIAN MTAVRULI CAPITAL LETTER AEN753charset[i++] = 0x1cbf; // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN754// 1CC0..1CCF; Sundanese Supplement755c = 0x1cc0; // from SUNDANESE PUNCTUATION BINDU SURYA756while (c <= 0x1cc7) // ..to SUNDANESE PUNCTUATION BINDU BA SATANGA757charset[i++] = c++;758// 1CD0..1CFF; Vedic Extensions759c = 0x1cd0; // from VEDIC TONE KARSHANA760while (c <= 0x1cfa) // ..to VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA761charset[i++] = c++;762// 1D00..1D7F; Phonetic Extensions763c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A764while (c <= 0x1d7f) // ..to LATIN SMALL LETTER UPSILON WITH STROKE765charset[i++] = c++;766// 1D80..1DBF; Phonetic Extensions Supplement767c = 0x1d80; // from LATIN SMALL LETTER B WITH PALATAL HOOK768while (c <= 0x1dbf) // ..to MODIFIER LETTER SMALL THETA769charset[i++] = c++;770// 1DC0..1DFF; Combining Diacritical Marks Supplement771c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT772while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW773charset[i++] = c++;774c = 0x1dfb; // from COMBINING DELETION MARK775while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW776charset[i++] = c++;777// 1E00..1EFF; Latin Extended Additional778c = 0x1e00; // from LATIN CAPITAL LETTER A WITH RING BELOW779while (c <= 0x1eff) // ..to LATIN SMALL LETTER Y WITH LOOP780charset[i++] = c++;781// 1F00..1FFF; Greek Extended782c = 0x1f00; // from GREEK SMALL LETTER ALPHA WITH PSILI783while (c <= 0x1f15) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA784charset[i++] = c++;785c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI786while (c <= 0x1f1d) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA787charset[i++] = c++;788c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI789while (c <= 0x1f45) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA790charset[i++] = c++;791c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI792while (c <= 0x1f4d) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA793charset[i++] = c++;794c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI795while (c <= 0x1f57) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI796charset[i++] = c++;797c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI798while (c <= 0x1f7d) // ..to GREEK SMALL LETTER OMEGA WITH OXIA799charset[i++] = c++;800c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI801while (c <= 0x1fb4) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI802charset[i++] = c++;803c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI804while (c <= 0x1fc4) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI805charset[i++] = c++;806c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI807while (c <= 0x1fd3) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA808charset[i++] = c++;809c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI810while (c <= 0x1fdb) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA811charset[i++] = c++;812c = 0x1fdd; // from GREEK DASIA AND VARIA813while (c <= 0x1fef) // ..to GREEK VARIA814charset[i++] = c++;815charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI816charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI817c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI818while (c <= 0x1ffe) // ..to GREEK DASIA819charset[i++] = c++;820// 2000..206F; General Punctuation821c = 0x2000; // from EN QUAD822while (c <= 0x2064) // ..to INVISIBLE PLUS823charset[i++] = c++;824c = 0x2066; // from LEFT-TO-RIGHT ISOLATE825while (c <= 0x206f) // ..to NOMINAL DIGIT SHAPES826charset[i++] = c++;827// 2070..209F; Superscripts and Subscripts828charset[i++] = 0x2070; // SUPERSCRIPT ZERO829charset[i++] = 0x2071; // SUPERSCRIPT LATIN SMALL LETTER I830c = 0x2074; // from SUPERSCRIPT FOUR831while (c <= 0x208e) // ..to SUBSCRIPT RIGHT PARENTHESIS832charset[i++] = c++;833c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A834while (c <= 0x209c) // ..to LATIN SUBSCRIPT SMALL LETTER T835charset[i++] = c++;836// 20A0..20CF; Currency Symbols837c = 0x20a0; // from EURO-CURRENCY SIGN838while (c <= 0x20bf) // ..to BITCOIN SIGN839charset[i++] = c++;840// 20D0..20FF; Combining Diacritical Marks for Symbols841c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE842while (c <= 0x20f0) // ..to COMBINING ASTERISK ABOVE843charset[i++] = c++;844// 2100..214F; Letterlike Symbols845c = 0x2100; // from ACCOUNT OF846while (c <= 0x214f) // ..to SYMBOL FOR SAMARITAN SOURCE847charset[i++] = c++;848// 2150..218F; Number Forms849c = 0x2150; // from VULGAR FRACTION ONE SEVENTH850while (c <= 0x218b) // ..to TURNED DIGIT THREE851charset[i++] = c++;852// 2190..21FF; Arrows853c = 0x2190; // from LEFTWARDS ARROW854while (c <= 0x21ff) // ..to LEFT RIGHT OPEN-HEADED ARROW855charset[i++] = c++;856// 2200..22FF; Mathematical Operators857c = 0x2200; // from FOR ALL858while (c <= 0x22ff) // ..to Z NOTATION BAG MEMBERSHIP859charset[i++] = c++;860// 2300..23FF; Miscellaneous Technical861c = 0x2300; // from DIAMETER SIGN862while (c <= 0x23ff) // ..to OBSERVER EYE SYMBOL863charset[i++] = c++;864// 2400..243F; Control Pictures865c = 0x2400; // from SYMBOL FOR NULL866while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO867charset[i++] = c++;868// 2440..245F; Optical Character Recognition869c = 0x2440; // from OCR HOOK870while (c <= 0x244a) // ..to OCR DOUBLE BACKSLASH871charset[i++] = c++;872// 2460..24FF; Enclosed Alphanumerics873c = 0x2460; // from CIRCLED DIGIT ONE874while (c <= 0x24ff) // ..to NEGATIVE CIRCLED DIGIT ZERO875charset[i++] = c++;876// 2500..257F; Box Drawing877c = 0x2500; // from BOX DRAWINGS LIGHT HORIZONTAL878while (c <= 0x257f) // ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN879charset[i++] = c++;880// 2580..259F; Block Elements881c = 0x2580; // from UPPER HALF BLOCK882while (c <= 0x259f) // ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT883charset[i++] = c++;884// 25A0..25FF; Geometric Shapes885c = 0x25a0; // from BLACK SQUARE886while (c <= 0x25ff) // ..to LOWER RIGHT TRIANGLE887charset[i++] = c++;888// 2600..26FF; Miscellaneous Symbols889c = 0x2600; // from BLACK SUN WITH RAYS890while (c <= 0x26ff) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE891charset[i++] = c++;892// 2700..27BF; Dingbats893c = 0x2700; // from BLACK SAFETY SCISSORS894while (c <= 0x27bf) // ..to DOUBLE CURLY LOOP895charset[i++] = c++;896// 27C0..27EF; Miscellaneous Mathematical Symbols-A897c = 0x27c0; // from THREE DIMENSIONAL ANGLE898while (c <= 0x27ef) // ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS899charset[i++] = c++;900// 27F0..27FF; Supplemental Arrows-A901c = 0x27f0; // from UPWARDS QUADRUPLE ARROW902while (c <= 0x27ff) // ..to LONG RIGHTWARDS SQUIGGLE ARROW903charset[i++] = c++;904// 2800..28FF; Braille Patterns905c = 0x2800; // from BRAILLE PATTERN BLANK906while (c <= 0x28ff) // ..to BRAILLE PATTERN DOTS-12345678907charset[i++] = c++;908// 2900..297F; Supplemental Arrows-B909c = 0x2900; // from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE910while (c <= 0x297f) // ..to DOWN FISH TAIL911charset[i++] = c++;912// 2980..29FF; Miscellaneous Mathematical Symbols-B913c = 0x2980; // from TRIPLE VERTICAL BAR DELIMITER914while (c <= 0x29ff) // ..to MINY915charset[i++] = c++;916// 2A00..2AFF; Supplemental Mathematical Operators917c = 0x2a00; // from N-ARY CIRCLED DOT OPERATOR918while (c <= 0x2aff) // ..to N-ARY WHITE VERTICAL BAR919charset[i++] = c++;920// 2B00..2BFF; Miscellaneous Symbols and Arrows921c = 0x2b00; // from NORTH EAST WHITE ARROW922while (c <= 0x2b73) // ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR923charset[i++] = c++;924c = 0x2b76; // from NORTH WEST TRIANGLE-HEADED ARROW TO BAR925while (c <= 0x2b95) // ..to RIGHTWARDS BLACK ARROW926charset[i++] = c++;927c = 0x2b97; // from SYMBOL FOR TYPE A ELECTRONICS928while (c <= 0x2bff) // ..to HELLSCHREIBER PAUSE SYMBOL929charset[i++] = c++;930// 2C00..2C5F; Glagolitic931c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU932while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE933charset[i++] = c++;934c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU935while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE936charset[i++] = c++;937// 2C60..2C7F; Latin Extended-C938c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR939while (c <= 0x2c7f) // ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL940charset[i++] = c++;941// 2C80..2CFF; Coptic942c = 0x2c80; // from COPTIC CAPITAL LETTER ALFA943while (c <= 0x2cf3) // ..to COPTIC SMALL LETTER BOHAIRIC KHEI944charset[i++] = c++;945c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP946while (c <= 0x2cff) // ..to COPTIC MORPHOLOGICAL DIVIDER947charset[i++] = c++;948// 2D00..2D2F; Georgian Supplement949c = 0x2d00; // from GEORGIAN SMALL LETTER AN950while (c <= 0x2d25) // ..to GEORGIAN SMALL LETTER HOE951charset[i++] = c++;952c = 0x2d27; // from GEORGIAN SMALL LETTER YN953while (c <= 0x2d2d) // ..to GEORGIAN SMALL LETTER AEN954charset[i++] = c++;955// 2D30..2D7F; Tifinagh956c = 0x2d30; // from TIFINAGH LETTER YA957while (c <= 0x2d67) // ..to TIFINAGH LETTER YO958charset[i++] = c++;959charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK960charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK961charset[i++] = 0x2d7f; // TIFINAGH CONSONANT JOINER962// 2D80..2DDF; Ethiopic Extended963c = 0x2d80; // from ETHIOPIC SYLLABLE LOA964while (c <= 0x2d96) // ..to ETHIOPIC SYLLABLE GGWE965charset[i++] = c++;966c = 0x2da0; // from ETHIOPIC SYLLABLE SSA967while (c <= 0x2da6) // ..to ETHIOPIC SYLLABLE SSO968charset[i++] = c++;969c = 0x2da8; // from ETHIOPIC SYLLABLE CCA970while (c <= 0x2dae) // ..to ETHIOPIC SYLLABLE CCO971charset[i++] = c++;972c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA973while (c <= 0x2db6) // ..to ETHIOPIC SYLLABLE ZZO974charset[i++] = c++;975c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA976while (c <= 0x2dbe) // ..to ETHIOPIC SYLLABLE CCHO977charset[i++] = c++;978c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA979while (c <= 0x2dc6) // ..to ETHIOPIC SYLLABLE QYO980charset[i++] = c++;981c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA982while (c <= 0x2dce) // ..to ETHIOPIC SYLLABLE KYO983charset[i++] = c++;984c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA985while (c <= 0x2dd6) // ..to ETHIOPIC SYLLABLE XYO986charset[i++] = c++;987c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA988while (c <= 0x2dde) // ..to ETHIOPIC SYLLABLE GYO989charset[i++] = c++;990// 2DE0..2DFF; Cyrillic Extended-A991c = 0x2de0; // from COMBINING CYRILLIC LETTER BE992while (c <= 0x2dff) // ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS993charset[i++] = c++;994// 2E00..2E7F; Supplemental Punctuation995c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER996while (c <= 0x2e52) // ..to TIRONIAN SIGN CAPITAL ET997charset[i++] = c++;998// 2E80..2EFF; CJK Radicals Supplement999c = 0x2e80; // from CJK RADICAL REPEAT1000while (c <= 0x2e99) // ..to CJK RADICAL RAP1001charset[i++] = c++;1002c = 0x2e9b; // from CJK RADICAL CHOKE1003while (c <= 0x2ef3) // ..to CJK RADICAL C-SIMPLIFIED TURTLE1004charset[i++] = c++;1005// 2F00..2FDF; Kangxi Radicals1006c = 0x2f00; // from KANGXI RADICAL ONE1007while (c <= 0x2fd5) // ..to KANGXI RADICAL FLUTE1008charset[i++] = c++;1009// 2FF0..2FFF; Ideographic Description Characters1010c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT1011while (c <= 0x2ffb) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID1012charset[i++] = c++;1013// 3000..303F; CJK Symbols and Punctuation1014c = 0x3000; // from IDEOGRAPHIC SPACE1015while (c <= 0x303f) // ..to IDEOGRAPHIC HALF FILL SPACE1016charset[i++] = c++;1017// 3040..309F; Hiragana1018c = 0x3041; // from HIRAGANA LETTER SMALL A1019while (c <= 0x3096) // ..to HIRAGANA LETTER SMALL KE1020charset[i++] = c++;1021c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK1022while (c <= 0x309f) // ..to HIRAGANA DIGRAPH YORI1023charset[i++] = c++;1024// 30A0..30FF; Katakana1025c = 0x30a0; // from KATAKANA-HIRAGANA DOUBLE HYPHEN1026while (c <= 0x30ff) // ..to KATAKANA DIGRAPH KOTO1027charset[i++] = c++;1028// 3100..312F; Bopomofo1029c = 0x3105; // from BOPOMOFO LETTER B1030while (c <= 0x312f) // ..to BOPOMOFO LETTER NN1031charset[i++] = c++;1032// 3130..318F; Hangul Compatibility Jamo1033c = 0x3131; // from HANGUL LETTER KIYEOK1034while (c <= 0x318e) // ..to HANGUL LETTER ARAEAE1035charset[i++] = c++;1036// 3190..319F; Kanbun1037c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK1038while (c <= 0x319f) // ..to IDEOGRAPHIC ANNOTATION MAN MARK1039charset[i++] = c++;1040// 31A0..31BF; Bopomofo Extended1041c = 0x31a0; // from BOPOMOFO LETTER BU1042while (c <= 0x31bf) // ..to BOPOMOFO LETTER AH1043charset[i++] = c++;1044// 31C0..31EF; CJK Strokes1045c = 0x31c0; // from CJK STROKE T1046while (c <= 0x31e3) // ..to CJK STROKE Q1047charset[i++] = c++;1048// 31F0..31FF; Katakana Phonetic Extensions1049c = 0x31f0; // from KATAKANA LETTER SMALL KU1050while (c <= 0x31ff) // ..to KATAKANA LETTER SMALL RO1051charset[i++] = c++;1052// 3200..32FF; Enclosed CJK Letters and Months1053c = 0x3200; // from PARENTHESIZED HANGUL KIYEOK1054while (c <= 0x321e) // ..to PARENTHESIZED KOREAN CHARACTER O HU1055charset[i++] = c++;1056c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE1057while (c <= 0x32ff) // ..to SQUARE ERA NAME REIWA1058charset[i++] = c++;1059// 3300..33FF; CJK Compatibility1060c = 0x3300; // from SQUARE APAATO1061while (c <= 0x33ff) // ..to SQUARE GAL1062charset[i++] = c++;1063// 3400..4DBF; CJK Unified Ideographs Extension A1064c = 0x3400; // from <CJK Ideograph Extension A, First>1065while (c <= 0x4dbf) // ..to <CJK Ideograph Extension A, Last>1066charset[i++] = c++;1067// 4DC0..4DFF; Yijing Hexagram Symbols1068c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN1069while (c <= 0x4dff) // ..to HEXAGRAM FOR BEFORE COMPLETION1070charset[i++] = c++;1071// 4E00..9FFF; CJK Unified Ideographs1072c = 0x4e00; // from <CJK Ideograph, First>1073while (c <= 0x9ffc) // ..to <CJK Ideograph, Last>1074charset[i++] = c++;1075// A000..A48F; Yi Syllables1076c = 0xa000; // from YI SYLLABLE IT1077while (c <= 0xa48c) // ..to YI SYLLABLE YYR1078charset[i++] = c++;1079// A490..A4CF; Yi Radicals1080c = 0xa490; // from YI RADICAL QOT1081while (c <= 0xa4c6) // ..to YI RADICAL KE1082charset[i++] = c++;1083// A4D0..A4FF; Lisu1084c = 0xa4d0; // from LISU LETTER BA1085while (c <= 0xa4ff) // ..to LISU PUNCTUATION FULL STOP1086charset[i++] = c++;1087// A500..A63F; Vai1088c = 0xa500; // from VAI SYLLABLE EE1089while (c <= 0xa62b) // ..to VAI SYLLABLE NDOLE DO1090charset[i++] = c++;1091// A640..A69F; Cyrillic Extended-B1092c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA1093while (c <= 0xa69f) // ..to COMBINING CYRILLIC LETTER IOTIFIED E1094charset[i++] = c++;1095// A6A0..A6FF; Bamum1096c = 0xa6a0; // from BAMUM LETTER A1097while (c <= 0xa6f7) // ..to BAMUM QUESTION MARK1098charset[i++] = c++;1099// A700..A71F; Modifier Tone Letters1100c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING1101while (c <= 0xa71f) // ..to MODIFIER LETTER LOW INVERTED EXCLAMATION MARK1102charset[i++] = c++;1103// A720..A7FF; Latin Extended-D1104c = 0xa720; // from MODIFIER LETTER STRESS AND HIGH TONE1105while (c <= 0xa7bf) // ..to LATIN SMALL LETTER GLOTTAL U1106charset[i++] = c++;1107c = 0xa7c2; // from LATIN CAPITAL LETTER ANGLICANA W1108while (c <= 0xa7ca) // ..to LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY1109charset[i++] = c++;1110c = 0xa7f5; // from LATIN CAPITAL LETTER REVERSED HALF H1111while (c <= 0xa7ff) // ..to LATIN EPIGRAPHIC LETTER ARCHAIC M1112charset[i++] = c++;1113// A800..A82F; Syloti Nagri1114c = 0xa800; // from SYLOTI NAGRI LETTER A1115while (c <= 0xa82c) // ..to SYLOTI NAGRI SIGN ALTERNATE HASANTA1116charset[i++] = c++;1117// A830..A83F; Common Indic Number Forms1118c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER1119while (c <= 0xa839) // ..to NORTH INDIC QUANTITY MARK1120charset[i++] = c++;1121// A840..A87F; Phags-pa1122c = 0xa840; // from PHAGS-PA LETTER KA1123while (c <= 0xa877) // ..to PHAGS-PA MARK DOUBLE SHAD1124charset[i++] = c++;1125// A880..A8DF; Saurashtra1126c = 0xa880; // from SAURASHTRA SIGN ANUSVARA1127while (c <= 0xa8c5) // ..to SAURASHTRA SIGN CANDRABINDU1128charset[i++] = c++;1129c = 0xa8ce; // from SAURASHTRA DANDA1130while (c <= 0xa8d9) // ..to SAURASHTRA DIGIT NINE1131charset[i++] = c++;1132// A8E0..A8FF; Devanagari Extended1133c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO1134while (c <= 0xa8ff) // ..to DEVANAGARI VOWEL SIGN AY1135charset[i++] = c++;1136// A900..A92F; Kayah Li1137c = 0xa900; // from KAYAH LI DIGIT ZERO1138while (c <= 0xa92f) // ..to KAYAH LI SIGN SHYA1139charset[i++] = c++;1140// A930..A95F; Rejang1141c = 0xa930; // from REJANG LETTER KA1142while (c <= 0xa953) // ..to REJANG VIRAMA1143charset[i++] = c++;1144charset[i++] = 0xa95f; // REJANG SECTION MARK1145// A960..A97F; Hangul Jamo Extended-A1146c = 0xa960; // from HANGUL CHOSEONG TIKEUT-MIEUM1147while (c <= 0xa97c) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH1148charset[i++] = c++;1149// A980..A9DF; Javanese1150c = 0xa980; // from JAVANESE SIGN PANYANGGA1151while (c <= 0xa9cd) // ..to JAVANESE TURNED PADA PISELEH1152charset[i++] = c++;1153c = 0xa9cf; // from JAVANESE PANGRANGKEP1154while (c <= 0xa9d9) // ..to JAVANESE DIGIT NINE1155charset[i++] = c++;1156charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES1157charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN1158// A9E0..A9FF; Myanmar Extended-B1159c = 0xa9e0; // from MYANMAR LETTER SHAN GHA1160while (c <= 0xa9fe) // ..to MYANMAR LETTER TAI LAING BHA1161charset[i++] = c++;1162// AA00..AA5F; Cham1163c = 0xaa00; // from CHAM LETTER A1164while (c <= 0xaa36) // ..to CHAM CONSONANT SIGN WA1165charset[i++] = c++;1166c = 0xaa40; // from CHAM LETTER FINAL K1167while (c <= 0xaa4d) // ..to CHAM CONSONANT SIGN FINAL H1168charset[i++] = c++;1169c = 0xaa50; // from CHAM DIGIT ZERO1170while (c <= 0xaa59) // ..to CHAM DIGIT NINE1171charset[i++] = c++;1172c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL1173while (c <= 0xaa5f) // ..to CHAM PUNCTUATION TRIPLE DANDA1174charset[i++] = c++;1175// AA60..AA7F; Myanmar Extended-A1176c = 0xaa60; // from MYANMAR LETTER KHAMTI GA1177while (c <= 0xaa7f) // ..to MYANMAR LETTER SHWE PALAUNG SHA1178charset[i++] = c++;1179// AA80..AADF; Tai Viet1180c = 0xaa80; // from TAI VIET LETTER LOW KO1181while (c <= 0xaac2) // ..to TAI VIET TONE MAI SONG1182charset[i++] = c++;1183c = 0xaadb; // from TAI VIET SYMBOL KON1184while (c <= 0xaadf) // ..to TAI VIET SYMBOL KOI KOI1185charset[i++] = c++;1186// AAE0..AAFF; Meetei Mayek Extensions1187c = 0xaae0; // from MEETEI MAYEK LETTER E1188while (c <= 0xaaf6) // ..to MEETEI MAYEK VIRAMA1189charset[i++] = c++;1190// AB00..AB2F; Ethiopic Extended-A1191c = 0xab01; // from ETHIOPIC SYLLABLE TTHU1192while (c <= 0xab06) // ..to ETHIOPIC SYLLABLE TTHO1193charset[i++] = c++;1194c = 0xab09; // from ETHIOPIC SYLLABLE DDHU1195while (c <= 0xab0e) // ..to ETHIOPIC SYLLABLE DDHO1196charset[i++] = c++;1197c = 0xab11; // from ETHIOPIC SYLLABLE DZU1198while (c <= 0xab16) // ..to ETHIOPIC SYLLABLE DZO1199charset[i++] = c++;1200c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA1201while (c <= 0xab26) // ..to ETHIOPIC SYLLABLE CCHHO1202charset[i++] = c++;1203c = 0xab28; // from ETHIOPIC SYLLABLE BBA1204while (c <= 0xab2e) // ..to ETHIOPIC SYLLABLE BBO1205charset[i++] = c++;1206// AB30..AB6F; Latin Extended-E1207c = 0xab30; // from LATIN SMALL LETTER BARRED ALPHA1208while (c <= 0xab6b) // ..to MODIFIER LETTER RIGHT TACK1209charset[i++] = c++;1210// AB70..ABBF; Cherokee Supplement1211c = 0xab70; // from CHEROKEE SMALL LETTER A1212while (c <= 0xabbf) // ..to CHEROKEE SMALL LETTER YA1213charset[i++] = c++;1214// ABC0..ABFF; Meetei Mayek1215c = 0xabc0; // from MEETEI MAYEK LETTER KOK1216while (c <= 0xabed) // ..to MEETEI MAYEK APUN IYEK1217charset[i++] = c++;1218c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO1219while (c <= 0xabf9) // ..to MEETEI MAYEK DIGIT NINE1220charset[i++] = c++;1221// AC00..D7AF; Hangul Syllables1222c = 0xac00; // from <Hangul Syllable, First>1223while (c <= 0xd7a3) // ..to <Hangul Syllable, Last>1224charset[i++] = c++;1225// D7B0..D7FF; Hangul Jamo Extended-B1226c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO1227while (c <= 0xd7c6) // ..to HANGUL JUNGSEONG ARAEA-E1228charset[i++] = c++;1229c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL1230while (c <= 0xd7fb) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH1231charset[i++] = c++;1232// D800..DB7F; High Surrogates1233// DB80..DBFF; High Private Use Surrogates1234// DC00..DFFF; Low Surrogates1235// E000..F8FF; Private Use Area1236// F900..FAFF; CJK Compatibility Ideographs1237c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F9001238while (c <= 0xfa6d) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D1239charset[i++] = c++;1240c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA701241while (c <= 0xfad9) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD91242charset[i++] = c++;1243// FB00..FB4F; Alphabetic Presentation Forms1244c = 0xfb00; // from LATIN SMALL LIGATURE FF1245while (c <= 0xfb06) // ..to LATIN SMALL LIGATURE ST1246charset[i++] = c++;1247c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW1248while (c <= 0xfb17) // ..to ARMENIAN SMALL LIGATURE MEN XEH1249charset[i++] = c++;1250c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ1251while (c <= 0xfb36) // ..to HEBREW LETTER ZAYIN WITH DAGESH1252charset[i++] = c++;1253c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH1254while (c <= 0xfb3c) // ..to HEBREW LETTER LAMED WITH DAGESH1255charset[i++] = c++;1256charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH1257charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH1258charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH1259charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH1260c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH1261while (c <= 0xfb4f) // ..to HEBREW LIGATURE ALEF LAMED1262charset[i++] = c++;1263// FB50..FDFF; Arabic Presentation Forms-A1264c = 0xfb50; // from ARABIC LETTER ALEF WASLA ISOLATED FORM1265while (c <= 0xfbc1) // ..to ARABIC SYMBOL SMALL TAH BELOW1266charset[i++] = c++;1267c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM1268while (c <= 0xfd3f) // ..to ORNATE RIGHT PARENTHESIS1269charset[i++] = c++;1270c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM1271while (c <= 0xfd8f) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM1272charset[i++] = c++;1273c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM1274while (c <= 0xfdc7) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM1275charset[i++] = c++;1276c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM1277while (c <= 0xfdfd) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM1278charset[i++] = c++;1279// FE00..FE0F; Variation Selectors1280c = 0xfe00; // from VARIATION SELECTOR-11281while (c <= 0xfe0f) // ..to VARIATION SELECTOR-161282charset[i++] = c++;1283// FE10..FE1F; Vertical Forms1284c = 0xfe10; // from PRESENTATION FORM FOR VERTICAL COMMA1285while (c <= 0xfe19) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS1286charset[i++] = c++;1287// FE20..FE2F; Combining Half Marks1288c = 0xfe20; // from COMBINING LIGATURE LEFT HALF1289while (c <= 0xfe2f) // ..to COMBINING CYRILLIC TITLO RIGHT HALF1290charset[i++] = c++;1291// FE30..FE4F; CJK Compatibility Forms1292c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER1293while (c <= 0xfe4f) // ..to WAVY LOW LINE1294charset[i++] = c++;1295// FE50..FE6F; Small Form Variants1296charset[i++] = 0xfe50; // SMALL COMMA1297charset[i++] = 0xfe52; // SMALL FULL STOP1298c = 0xfe54; // from SMALL SEMICOLON1299while (c <= 0xfe66) // ..to SMALL EQUALS SIGN1300charset[i++] = c++;1301c = 0xfe68; // from SMALL REVERSE SOLIDUS1302while (c <= 0xfe6b) // ..to SMALL COMMERCIAL AT1303charset[i++] = c++;1304// FE70..FEFF; Arabic Presentation Forms-B1305c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM1306while (c <= 0xfe74) // ..to ARABIC KASRATAN ISOLATED FORM1307charset[i++] = c++;1308c = 0xfe76; // from ARABIC FATHA ISOLATED FORM1309while (c <= 0xfefc) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM1310charset[i++] = c++;1311charset[i++] = 0xfeff; // ZERO WIDTH NO-BREAK SPACE1312// FF00..FFEF; Halfwidth and Fullwidth Forms1313c = 0xff01; // from FULLWIDTH EXCLAMATION MARK1314while (c <= 0xffbe) // ..to HALFWIDTH HANGUL LETTER HIEUH1315charset[i++] = c++;1316c = 0xffc2; // from HALFWIDTH HANGUL LETTER A1317while (c <= 0xffc7) // ..to HALFWIDTH HANGUL LETTER E1318charset[i++] = c++;1319c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO1320while (c <= 0xffcf) // ..to HALFWIDTH HANGUL LETTER OE1321charset[i++] = c++;1322c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO1323while (c <= 0xffd7) // ..to HALFWIDTH HANGUL LETTER YU1324charset[i++] = c++;1325charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU1326charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I1327c = 0xffe0; // from FULLWIDTH CENT SIGN1328while (c <= 0xffe6) // ..to FULLWIDTH WON SIGN1329charset[i++] = c++;1330c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL1331while (c <= 0xffee) // ..to HALFWIDTH WHITE CIRCLE1332charset[i++] = c++;1333// FFF0..FFFF; Specials1334c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR1335while (c <= 0xfffd) // ..to REPLACEMENT CHARACTER1336charset[i++] = c++;1337// 10000..1007F; Linear B Syllabary1338c = 0x10000; // from LINEAR B SYLLABLE B008 A1339while (c <= 0x1000b) // ..to LINEAR B SYLLABLE B046 JE1340charset[i++] = c++;1341c = 0x1000d; // from LINEAR B SYLLABLE B036 JO1342while (c <= 0x10026) // ..to LINEAR B SYLLABLE B032 QO1343charset[i++] = c++;1344c = 0x10028; // from LINEAR B SYLLABLE B060 RA1345while (c <= 0x1003a) // ..to LINEAR B SYLLABLE B042 WO1346charset[i++] = c++;1347charset[i++] = 0x1003c; // LINEAR B SYLLABLE B017 ZA1348charset[i++] = 0x1003d; // LINEAR B SYLLABLE B074 ZE1349c = 0x1003f; // from LINEAR B SYLLABLE B020 ZO1350while (c <= 0x1004d) // ..to LINEAR B SYLLABLE B091 TWO1351charset[i++] = c++;1352c = 0x10050; // from LINEAR B SYMBOL B0181353while (c <= 0x1005d) // ..to LINEAR B SYMBOL B0891354charset[i++] = c++;1355// 10080..100FF; Linear B Ideograms1356c = 0x10080; // from LINEAR B IDEOGRAM B100 MAN1357while (c <= 0x100fa) // ..to LINEAR B IDEOGRAM VESSEL B3051358charset[i++] = c++;1359// 10100..1013F; Aegean Numbers1360charset[i++] = 0x10100; // AEGEAN WORD SEPARATOR LINE1361charset[i++] = 0x10102; // AEGEAN CHECK MARK1362c = 0x10107; // from AEGEAN NUMBER ONE1363while (c <= 0x10133) // ..to AEGEAN NUMBER NINETY THOUSAND1364charset[i++] = c++;1365c = 0x10137; // from AEGEAN WEIGHT BASE UNIT1366while (c <= 0x1013f) // ..to AEGEAN MEASURE THIRD SUBUNIT1367charset[i++] = c++;1368// 10140..1018F; Ancient Greek Numbers1369c = 0x10140; // from GREEK ACROPHONIC ATTIC ONE QUARTER1370while (c <= 0x1018e) // ..to NOMISMA SIGN1371charset[i++] = c++;1372// 10190..101CF; Ancient Symbols1373c = 0x10190; // from ROMAN SEXTANS SIGN1374while (c <= 0x1019c) // ..to ASCIA SYMBOL1375charset[i++] = c++;1376charset[i++] = 0x101a0; // GREEK SYMBOL TAU RHO1377// 101D0..101FF; Phaistos Disc1378c = 0x101d0; // from PHAISTOS DISC SIGN PEDESTRIAN1379while (c <= 0x101fd) // ..to PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE1380charset[i++] = c++;1381// 10280..1029F; Lycian1382c = 0x10280; // from LYCIAN LETTER A1383while (c <= 0x1029c) // ..to LYCIAN LETTER X1384charset[i++] = c++;1385// 102A0..102DF; Carian1386c = 0x102a0; // from CARIAN LETTER A1387while (c <= 0x102d0) // ..to CARIAN LETTER UUU31388charset[i++] = c++;1389// 102E0..102FF; Coptic Epact Numbers1390c = 0x102e0; // from COPTIC EPACT THOUSANDS MARK1391while (c <= 0x102fb) // ..to COPTIC EPACT NUMBER NINE HUNDRED1392charset[i++] = c++;1393// 10300..1032F; Old Italic1394c = 0x10300; // from OLD ITALIC LETTER A1395while (c <= 0x10323) // ..to OLD ITALIC NUMERAL FIFTY1396charset[i++] = c++;1397charset[i++] = 0x1032d; // OLD ITALIC LETTER YE1398charset[i++] = 0x1032f; // OLD ITALIC LETTER SOUTHERN TSE1399// 10330..1034F; Gothic1400c = 0x10330; // from GOTHIC LETTER AHSA1401while (c <= 0x1034a) // ..to GOTHIC LETTER NINE HUNDRED1402charset[i++] = c++;1403// 10350..1037F; Old Permic1404c = 0x10350; // from OLD PERMIC LETTER AN1405while (c <= 0x1037a) // ..to COMBINING OLD PERMIC LETTER SII1406charset[i++] = c++;1407// 10380..1039F; Ugaritic1408c = 0x10380; // from UGARITIC LETTER ALPA1409while (c <= 0x1039d) // ..to UGARITIC LETTER SSU1410charset[i++] = c++;1411charset[i++] = 0x1039f; // UGARITIC WORD DIVIDER1412// 103A0..103DF; Old Persian1413c = 0x103a0; // from OLD PERSIAN SIGN A1414while (c <= 0x103c3) // ..to OLD PERSIAN SIGN HA1415charset[i++] = c++;1416c = 0x103c8; // from OLD PERSIAN SIGN AURAMAZDAA1417while (c <= 0x103d5) // ..to OLD PERSIAN NUMBER HUNDRED1418charset[i++] = c++;1419// 10400..1044F; Deseret1420c = 0x10400; // from DESERET CAPITAL LETTER LONG I1421while (c <= 0x1044f) // ..to DESERET SMALL LETTER EW1422charset[i++] = c++;1423// 10450..1047F; Shavian1424c = 0x10450; // from SHAVIAN LETTER PEEP1425while (c <= 0x1047f) // ..to SHAVIAN LETTER YEW1426charset[i++] = c++;1427// 10480..104AF; Osmanya1428c = 0x10480; // from OSMANYA LETTER ALEF1429while (c <= 0x1049d) // ..to OSMANYA LETTER OO1430charset[i++] = c++;1431c = 0x104a0; // from OSMANYA DIGIT ZERO1432while (c <= 0x104a9) // ..to OSMANYA DIGIT NINE1433charset[i++] = c++;1434// 104B0..104FF; Osage1435c = 0x104b0; // from OSAGE CAPITAL LETTER A1436while (c <= 0x104d3) // ..to OSAGE CAPITAL LETTER ZHA1437charset[i++] = c++;1438c = 0x104d8; // from OSAGE SMALL LETTER A1439while (c <= 0x104fb) // ..to OSAGE SMALL LETTER ZHA1440charset[i++] = c++;1441// 10500..1052F; Elbasan1442c = 0x10500; // from ELBASAN LETTER A1443while (c <= 0x10527) // ..to ELBASAN LETTER KHE1444charset[i++] = c++;1445// 10530..1056F; Caucasian Albanian1446c = 0x10530; // from CAUCASIAN ALBANIAN LETTER ALT1447while (c <= 0x10563) // ..to CAUCASIAN ALBANIAN LETTER KIW1448charset[i++] = c++;1449charset[i++] = 0x1056f; // CAUCASIAN ALBANIAN CITATION MARK1450// 10600..1077F; Linear A1451c = 0x10600; // from LINEAR A SIGN AB0011452while (c <= 0x10736) // ..to LINEAR A SIGN A6641453charset[i++] = c++;1454c = 0x10740; // from LINEAR A SIGN A701 A1455while (c <= 0x10755) // ..to LINEAR A SIGN A732 JE1456charset[i++] = c++;1457c = 0x10760; // from LINEAR A SIGN A8001458while (c <= 0x10767) // ..to LINEAR A SIGN A8071459charset[i++] = c++;1460// 10800..1083F; Cypriot Syllabary1461c = 0x10800; // from CYPRIOT SYLLABLE A1462while (c <= 0x10805) // ..to CYPRIOT SYLLABLE JA1463charset[i++] = c++;1464c = 0x1080a; // from CYPRIOT SYLLABLE KA1465while (c <= 0x10835) // ..to CYPRIOT SYLLABLE WO1466charset[i++] = c++;1467charset[i++] = 0x10837; // CYPRIOT SYLLABLE XA1468charset[i++] = 0x10838; // CYPRIOT SYLLABLE XE1469c = 0x1083c; // from CYPRIOT SYLLABLE ZA1470while (c <= 0x1083f) // ..to CYPRIOT SYLLABLE ZO1471charset[i++] = c++;1472// 10840..1085F; Imperial Aramaic1473c = 0x10840; // from IMPERIAL ARAMAIC LETTER ALEPH1474while (c <= 0x10855) // ..to IMPERIAL ARAMAIC LETTER TAW1475charset[i++] = c++;1476c = 0x10857; // from IMPERIAL ARAMAIC SECTION SIGN1477while (c <= 0x1085f) // ..to IMPERIAL ARAMAIC NUMBER TEN THOUSAND1478charset[i++] = c++;1479// 10860..1087F; Palmyrene1480c = 0x10860; // from PALMYRENE LETTER ALEPH1481while (c <= 0x1087f) // ..to PALMYRENE NUMBER TWENTY1482charset[i++] = c++;1483// 10880..108AF; Nabataean1484c = 0x10880; // from NABATAEAN LETTER FINAL ALEPH1485while (c <= 0x1089e) // ..to NABATAEAN LETTER TAW1486charset[i++] = c++;1487c = 0x108a7; // from NABATAEAN NUMBER ONE1488while (c <= 0x108af) // ..to NABATAEAN NUMBER ONE HUNDRED1489charset[i++] = c++;1490// 108E0..108FF; Hatran1491c = 0x108e0; // from HATRAN LETTER ALEPH1492while (c <= 0x108f2) // ..to HATRAN LETTER QOPH1493charset[i++] = c++;1494charset[i++] = 0x108f4; // HATRAN LETTER SHIN1495charset[i++] = 0x108f5; // HATRAN LETTER TAW1496c = 0x108fb; // from HATRAN NUMBER ONE1497while (c <= 0x108ff) // ..to HATRAN NUMBER ONE HUNDRED1498charset[i++] = c++;1499// 10900..1091F; Phoenician1500c = 0x10900; // from PHOENICIAN LETTER ALF1501while (c <= 0x1091b) // ..to PHOENICIAN NUMBER THREE1502charset[i++] = c++;1503charset[i++] = 0x1091f; // PHOENICIAN WORD SEPARATOR1504// 10920..1093F; Lydian1505c = 0x10920; // from LYDIAN LETTER A1506while (c <= 0x10939) // ..to LYDIAN LETTER C1507charset[i++] = c++;1508charset[i++] = 0x1093f; // LYDIAN TRIANGULAR MARK1509// 10980..1099F; Meroitic Hieroglyphs1510c = 0x10980; // from MEROITIC HIEROGLYPHIC LETTER A1511while (c <= 0x1099f) // ..to MEROITIC HIEROGLYPHIC SYMBOL VIDJ-21512charset[i++] = c++;1513// 109A0..109FF; Meroitic Cursive1514c = 0x109a0; // from MEROITIC CURSIVE LETTER A1515while (c <= 0x109b7) // ..to MEROITIC CURSIVE LETTER DA1516charset[i++] = c++;1517c = 0x109bc; // from MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS1518while (c <= 0x109cf) // ..to MEROITIC CURSIVE NUMBER SEVENTY1519charset[i++] = c++;1520c = 0x109d2; // from MEROITIC CURSIVE NUMBER ONE HUNDRED1521while (c <= 0x109ff) // ..to MEROITIC CURSIVE FRACTION TEN TWELFTHS1522charset[i++] = c++;1523// 10A00..10A5F; Kharoshthi1524c = 0x10a00; // from KHAROSHTHI LETTER A1525while (c <= 0x10a03) // ..to KHAROSHTHI VOWEL SIGN VOCALIC R1526charset[i++] = c++;1527charset[i++] = 0x10a05; // KHAROSHTHI VOWEL SIGN E1528charset[i++] = 0x10a06; // KHAROSHTHI VOWEL SIGN O1529c = 0x10a0c; // from KHAROSHTHI VOWEL LENGTH MARK1530while (c <= 0x10a13) // ..to KHAROSHTHI LETTER GHA1531charset[i++] = c++;1532charset[i++] = 0x10a15; // KHAROSHTHI LETTER CA1533charset[i++] = 0x10a17; // KHAROSHTHI LETTER JA1534c = 0x10a19; // from KHAROSHTHI LETTER NYA1535while (c <= 0x10a35) // ..to KHAROSHTHI LETTER VHA1536charset[i++] = c++;1537charset[i++] = 0x10a38; // KHAROSHTHI SIGN BAR ABOVE1538charset[i++] = 0x10a3a; // KHAROSHTHI SIGN DOT BELOW1539c = 0x10a3f; // from KHAROSHTHI VIRAMA1540while (c <= 0x10a48) // ..to KHAROSHTHI FRACTION ONE HALF1541charset[i++] = c++;1542c = 0x10a50; // from KHAROSHTHI PUNCTUATION DOT1543while (c <= 0x10a58) // ..to KHAROSHTHI PUNCTUATION LINES1544charset[i++] = c++;1545// 10A60..10A7F; Old South Arabian1546c = 0x10a60; // from OLD SOUTH ARABIAN LETTER HE1547while (c <= 0x10a7f) // ..to OLD SOUTH ARABIAN NUMERIC INDICATOR1548charset[i++] = c++;1549// 10A80..10A9F; Old North Arabian1550c = 0x10a80; // from OLD NORTH ARABIAN LETTER HEH1551while (c <= 0x10a9f) // ..to OLD NORTH ARABIAN NUMBER TWENTY1552charset[i++] = c++;1553// 10AC0..10AFF; Manichaean1554c = 0x10ac0; // from MANICHAEAN LETTER ALEPH1555while (c <= 0x10ae6) // ..to MANICHAEAN ABBREVIATION MARK BELOW1556charset[i++] = c++;1557c = 0x10aeb; // from MANICHAEAN NUMBER ONE1558while (c <= 0x10af6) // ..to MANICHAEAN PUNCTUATION LINE FILLER1559charset[i++] = c++;1560// 10B00..10B3F; Avestan1561c = 0x10b00; // from AVESTAN LETTER A1562while (c <= 0x10b35) // ..to AVESTAN LETTER HE1563charset[i++] = c++;1564c = 0x10b39; // from AVESTAN ABBREVIATION MARK1565while (c <= 0x10b3f) // ..to LARGE ONE RING OVER TWO RINGS PUNCTUATION1566charset[i++] = c++;1567// 10B40..10B5F; Inscriptional Parthian1568c = 0x10b40; // from INSCRIPTIONAL PARTHIAN LETTER ALEPH1569while (c <= 0x10b55) // ..to INSCRIPTIONAL PARTHIAN LETTER TAW1570charset[i++] = c++;1571c = 0x10b58; // from INSCRIPTIONAL PARTHIAN NUMBER ONE1572while (c <= 0x10b5f) // ..to INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND1573charset[i++] = c++;1574// 10B60..10B7F; Inscriptional Pahlavi1575c = 0x10b60; // from INSCRIPTIONAL PAHLAVI LETTER ALEPH1576while (c <= 0x10b72) // ..to INSCRIPTIONAL PAHLAVI LETTER TAW1577charset[i++] = c++;1578c = 0x10b78; // from INSCRIPTIONAL PAHLAVI NUMBER ONE1579while (c <= 0x10b7f) // ..to INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND1580charset[i++] = c++;1581// 10B80..10BAF; Psalter Pahlavi1582c = 0x10b80; // from PSALTER PAHLAVI LETTER ALEPH1583while (c <= 0x10b91) // ..to PSALTER PAHLAVI LETTER TAW1584charset[i++] = c++;1585c = 0x10b99; // from PSALTER PAHLAVI SECTION MARK1586while (c <= 0x10b9c) // ..to PSALTER PAHLAVI FOUR DOTS WITH DOT1587charset[i++] = c++;1588c = 0x10ba9; // from PSALTER PAHLAVI NUMBER ONE1589while (c <= 0x10baf) // ..to PSALTER PAHLAVI NUMBER ONE HUNDRED1590charset[i++] = c++;1591// 10C00..10C4F; Old Turkic1592c = 0x10c00; // from OLD TURKIC LETTER ORKHON A1593while (c <= 0x10c48) // ..to OLD TURKIC LETTER ORKHON BASH1594charset[i++] = c++;1595// 10C80..10CFF; Old Hungarian1596c = 0x10c80; // from OLD HUNGARIAN CAPITAL LETTER A1597while (c <= 0x10cb2) // ..to OLD HUNGARIAN CAPITAL LETTER US1598charset[i++] = c++;1599c = 0x10cc0; // from OLD HUNGARIAN SMALL LETTER A1600while (c <= 0x10cf2) // ..to OLD HUNGARIAN SMALL LETTER US1601charset[i++] = c++;1602c = 0x10cfa; // from OLD HUNGARIAN NUMBER ONE1603while (c <= 0x10cff) // ..to OLD HUNGARIAN NUMBER ONE THOUSAND1604charset[i++] = c++;1605// 10D00..10D3F; Hanifi Rohingya1606c = 0x10d00; // from HANIFI ROHINGYA LETTER A1607while (c <= 0x10d27) // ..to HANIFI ROHINGYA SIGN TASSI1608charset[i++] = c++;1609c = 0x10d30; // from HANIFI ROHINGYA DIGIT ZERO1610while (c <= 0x10d39) // ..to HANIFI ROHINGYA DIGIT NINE1611charset[i++] = c++;1612// 10E60..10E7F; Rumi Numeral Symbols1613c = 0x10e60; // from RUMI DIGIT ONE1614while (c <= 0x10e7e) // ..to RUMI FRACTION TWO THIRDS1615charset[i++] = c++;1616// 10E80..10EBF; Yezidi1617c = 0x10e80; // from YEZIDI LETTER ELIF1618while (c <= 0x10ea9) // ..to YEZIDI LETTER ET1619charset[i++] = c++;1620charset[i++] = 0x10eab; // YEZIDI COMBINING HAMZA MARK1621charset[i++] = 0x10ead; // YEZIDI HYPHENATION MARK1622charset[i++] = 0x10eb0; // YEZIDI LETTER LAM WITH DOT ABOVE1623charset[i++] = 0x10eb1; // YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE1624// 10F00..10F2F; Old Sogdian1625c = 0x10f00; // from OLD SOGDIAN LETTER ALEPH1626while (c <= 0x10f27) // ..to OLD SOGDIAN LIGATURE AYIN-DALETH1627charset[i++] = c++;1628// 10F30..10F6F; Sogdian1629c = 0x10f30; // from SOGDIAN LETTER ALEPH1630while (c <= 0x10f59) // ..to SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT1631charset[i++] = c++;1632// 10FB0..10FDF; Chorasmian1633c = 0x10fb0; // from CHORASMIAN LETTER ALEPH1634while (c <= 0x10fcb) // ..to CHORASMIAN NUMBER ONE HUNDRED1635charset[i++] = c++;1636// 10FE0..10FFF; Elymaic1637c = 0x10fe0; // from ELYMAIC LETTER ALEPH1638while (c <= 0x10ff6) // ..to ELYMAIC LIGATURE ZAYIN-YODH1639charset[i++] = c++;1640// 11000..1107F; Brahmi1641c = 0x11000; // from BRAHMI SIGN CANDRABINDU1642while (c <= 0x1104d) // ..to BRAHMI PUNCTUATION LOTUS1643charset[i++] = c++;1644c = 0x11052; // from BRAHMI NUMBER ONE1645while (c <= 0x1106f) // ..to BRAHMI DIGIT NINE1646charset[i++] = c++;1647charset[i++] = 0x1107f; // BRAHMI NUMBER JOINER1648// 11080..110CF; Kaithi1649c = 0x11080; // from KAITHI SIGN CANDRABINDU1650while (c <= 0x110c1) // ..to KAITHI DOUBLE DANDA1651charset[i++] = c++;1652charset[i++] = 0x110cd; // KAITHI NUMBER SIGN ABOVE1653// 110D0..110FF; Sora Sompeng1654c = 0x110d0; // from SORA SOMPENG LETTER SAH1655while (c <= 0x110e8) // ..to SORA SOMPENG LETTER MAE1656charset[i++] = c++;1657c = 0x110f0; // from SORA SOMPENG DIGIT ZERO1658while (c <= 0x110f9) // ..to SORA SOMPENG DIGIT NINE1659charset[i++] = c++;1660// 11100..1114F; Chakma1661c = 0x11100; // from CHAKMA SIGN CANDRABINDU1662while (c <= 0x11134) // ..to CHAKMA MAAYYAA1663charset[i++] = c++;1664c = 0x11136; // from CHAKMA DIGIT ZERO1665while (c <= 0x11147) // ..to CHAKMA LETTER VAA1666charset[i++] = c++;1667// 11150..1117F; Mahajani1668c = 0x11150; // from MAHAJANI LETTER A1669while (c <= 0x11176) // ..to MAHAJANI LIGATURE SHRI1670charset[i++] = c++;1671// 11180..111DF; Sharada1672c = 0x11180; // from SHARADA SIGN CANDRABINDU1673while (c <= 0x111df) // ..to SHARADA SECTION MARK-21674charset[i++] = c++;1675// 111E0..111FF; Sinhala Archaic Numbers1676c = 0x111e1; // from SINHALA ARCHAIC DIGIT ONE1677while (c <= 0x111f4) // ..to SINHALA ARCHAIC NUMBER ONE THOUSAND1678charset[i++] = c++;1679// 11200..1124F; Khojki1680c = 0x11200; // from KHOJKI LETTER A1681while (c <= 0x11211) // ..to KHOJKI LETTER JJA1682charset[i++] = c++;1683c = 0x11213; // from KHOJKI LETTER NYA1684while (c <= 0x1123e) // ..to KHOJKI SIGN SUKUN1685charset[i++] = c++;1686// 11280..112AF; Multani1687c = 0x11280; // from MULTANI LETTER A1688while (c <= 0x11286) // ..to MULTANI LETTER GA1689charset[i++] = c++;1690c = 0x1128a; // from MULTANI LETTER CA1691while (c <= 0x1128d) // ..to MULTANI LETTER JJA1692charset[i++] = c++;1693c = 0x1128f; // from MULTANI LETTER NYA1694while (c <= 0x1129d) // ..to MULTANI LETTER BA1695charset[i++] = c++;1696c = 0x1129f; // from MULTANI LETTER BHA1697while (c <= 0x112a9) // ..to MULTANI SECTION MARK1698charset[i++] = c++;1699// 112B0..112FF; Khudawadi1700c = 0x112b0; // from KHUDAWADI LETTER A1701while (c <= 0x112ea) // ..to KHUDAWADI SIGN VIRAMA1702charset[i++] = c++;1703c = 0x112f0; // from KHUDAWADI DIGIT ZERO1704while (c <= 0x112f9) // ..to KHUDAWADI DIGIT NINE1705charset[i++] = c++;1706// 11300..1137F; Grantha1707c = 0x11300; // from GRANTHA SIGN COMBINING ANUSVARA ABOVE1708while (c <= 0x11303) // ..to GRANTHA SIGN VISARGA1709charset[i++] = c++;1710c = 0x11305; // from GRANTHA LETTER A1711while (c <= 0x1130c) // ..to GRANTHA LETTER VOCALIC L1712charset[i++] = c++;1713charset[i++] = 0x1130f; // GRANTHA LETTER EE1714charset[i++] = 0x11310; // GRANTHA LETTER AI1715c = 0x11313; // from GRANTHA LETTER OO1716while (c <= 0x11328) // ..to GRANTHA LETTER NA1717charset[i++] = c++;1718c = 0x1132a; // from GRANTHA LETTER PA1719while (c <= 0x11330) // ..to GRANTHA LETTER RA1720charset[i++] = c++;1721charset[i++] = 0x11332; // GRANTHA LETTER LA1722charset[i++] = 0x11333; // GRANTHA LETTER LLA1723c = 0x11335; // from GRANTHA LETTER VA1724while (c <= 0x11339) // ..to GRANTHA LETTER HA1725charset[i++] = c++;1726c = 0x1133b; // from COMBINING BINDU BELOW1727while (c <= 0x11344) // ..to GRANTHA VOWEL SIGN VOCALIC RR1728charset[i++] = c++;1729charset[i++] = 0x11347; // GRANTHA VOWEL SIGN EE1730charset[i++] = 0x11348; // GRANTHA VOWEL SIGN AI1731charset[i++] = 0x1134b; // GRANTHA VOWEL SIGN OO1732charset[i++] = 0x1134d; // GRANTHA SIGN VIRAMA1733c = 0x1135d; // from GRANTHA SIGN PLUTA1734while (c <= 0x11363) // ..to GRANTHA VOWEL SIGN VOCALIC LL1735charset[i++] = c++;1736c = 0x11366; // from COMBINING GRANTHA DIGIT ZERO1737while (c <= 0x1136c) // ..to COMBINING GRANTHA DIGIT SIX1738charset[i++] = c++;1739c = 0x11370; // from COMBINING GRANTHA LETTER A1740while (c <= 0x11374) // ..to COMBINING GRANTHA LETTER PA1741charset[i++] = c++;1742// 11400..1147F; Newa1743c = 0x11400; // from NEWA LETTER A1744while (c <= 0x1145b) // ..to NEWA PLACEHOLDER MARK1745charset[i++] = c++;1746c = 0x1145d; // from NEWA INSERTION SIGN1747while (c <= 0x11461) // ..to NEWA SIGN UPADHMANIYA1748charset[i++] = c++;1749// 11480..114DF; Tirhuta1750c = 0x11480; // from TIRHUTA ANJI1751while (c <= 0x114c7) // ..to TIRHUTA OM1752charset[i++] = c++;1753c = 0x114d0; // from TIRHUTA DIGIT ZERO1754while (c <= 0x114d9) // ..to TIRHUTA DIGIT NINE1755charset[i++] = c++;1756// 11580..115FF; Siddham1757c = 0x11580; // from SIDDHAM LETTER A1758while (c <= 0x115b5) // ..to SIDDHAM VOWEL SIGN VOCALIC RR1759charset[i++] = c++;1760c = 0x115b8; // from SIDDHAM VOWEL SIGN E1761while (c <= 0x115dd) // ..to SIDDHAM VOWEL SIGN ALTERNATE UU1762charset[i++] = c++;1763// 11600..1165F; Modi1764c = 0x11600; // from MODI LETTER A1765while (c <= 0x11644) // ..to MODI SIGN HUVA1766charset[i++] = c++;1767c = 0x11650; // from MODI DIGIT ZERO1768while (c <= 0x11659) // ..to MODI DIGIT NINE1769charset[i++] = c++;1770// 11660..1167F; Mongolian Supplement1771c = 0x11660; // from MONGOLIAN BIRGA WITH ORNAMENT1772while (c <= 0x1166c) // ..to MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT1773charset[i++] = c++;1774// 11680..116CF; Takri1775c = 0x11680; // from TAKRI LETTER A1776while (c <= 0x116b8) // ..to TAKRI LETTER ARCHAIC KHA1777charset[i++] = c++;1778c = 0x116c0; // from TAKRI DIGIT ZERO1779while (c <= 0x116c9) // ..to TAKRI DIGIT NINE1780charset[i++] = c++;1781// 11700..1173F; Ahom1782c = 0x11700; // from AHOM LETTER KA1783while (c <= 0x1171a) // ..to AHOM LETTER ALTERNATE BA1784charset[i++] = c++;1785c = 0x1171d; // from AHOM CONSONANT SIGN MEDIAL LA1786while (c <= 0x1172b) // ..to AHOM SIGN KILLER1787charset[i++] = c++;1788c = 0x11730; // from AHOM DIGIT ZERO1789while (c <= 0x1173f) // ..to AHOM SYMBOL VI1790charset[i++] = c++;1791// 11800..1184F; Dogra1792c = 0x11800; // from DOGRA LETTER A1793while (c <= 0x1183b) // ..to DOGRA ABBREVIATION SIGN1794charset[i++] = c++;1795// 118A0..118FF; Warang Citi1796c = 0x118a0; // from WARANG CITI CAPITAL LETTER NGAA1797while (c <= 0x118f2) // ..to WARANG CITI NUMBER NINETY1798charset[i++] = c++;1799charset[i++] = 0x118ff; // WARANG CITI OM1800// 11900..1195F; Dives Akuru1801c = 0x11900; // from DIVES AKURU LETTER A1802while (c <= 0x11906) // ..to DIVES AKURU LETTER E1803charset[i++] = c++;1804c = 0x1190c; // from DIVES AKURU LETTER KA1805while (c <= 0x11913) // ..to DIVES AKURU LETTER JA1806charset[i++] = c++;1807charset[i++] = 0x11915; // DIVES AKURU LETTER NYA1808charset[i++] = 0x11916; // DIVES AKURU LETTER TTA1809c = 0x11918; // from DIVES AKURU LETTER DDA1810while (c <= 0x11935) // ..to DIVES AKURU VOWEL SIGN E1811charset[i++] = c++;1812charset[i++] = 0x11937; // DIVES AKURU VOWEL SIGN AI1813charset[i++] = 0x11938; // DIVES AKURU VOWEL SIGN O1814c = 0x1193b; // from DIVES AKURU SIGN ANUSVARA1815while (c <= 0x11946) // ..to DIVES AKURU END OF TEXT MARK1816charset[i++] = c++;1817c = 0x11950; // from DIVES AKURU DIGIT ZERO1818while (c <= 0x11959) // ..to DIVES AKURU DIGIT NINE1819charset[i++] = c++;1820// 119A0..119FF; Nandinagari1821c = 0x119a0; // from NANDINAGARI LETTER A1822while (c <= 0x119a7) // ..to NANDINAGARI LETTER VOCALIC RR1823charset[i++] = c++;1824c = 0x119aa; // from NANDINAGARI LETTER E1825while (c <= 0x119d7) // ..to NANDINAGARI VOWEL SIGN VOCALIC RR1826charset[i++] = c++;1827c = 0x119da; // from NANDINAGARI VOWEL SIGN E1828while (c <= 0x119e4) // ..to NANDINAGARI VOWEL SIGN PRISHTHAMATRA E1829charset[i++] = c++;1830// 11A00..11A4F; Zanabazar Square1831c = 0x11a00; // from ZANABAZAR SQUARE LETTER A1832while (c <= 0x11a47) // ..to ZANABAZAR SQUARE SUBJOINER1833charset[i++] = c++;1834// 11A50..11AAF; Soyombo1835c = 0x11a50; // from SOYOMBO LETTER A1836while (c <= 0x11aa2) // ..to SOYOMBO TERMINAL MARK-21837charset[i++] = c++;1838// 11AC0..11AFF; Pau Cin Hau1839c = 0x11ac0; // from PAU CIN HAU LETTER PA1840while (c <= 0x11af8) // ..to PAU CIN HAU GLOTTAL STOP FINAL1841charset[i++] = c++;1842// 11C00..11C6F; Bhaiksuki1843c = 0x11c00; // from BHAIKSUKI LETTER A1844while (c <= 0x11c08) // ..to BHAIKSUKI LETTER VOCALIC L1845charset[i++] = c++;1846c = 0x11c0a; // from BHAIKSUKI LETTER E1847while (c <= 0x11c36) // ..to BHAIKSUKI VOWEL SIGN VOCALIC L1848charset[i++] = c++;1849c = 0x11c38; // from BHAIKSUKI VOWEL SIGN E1850while (c <= 0x11c45) // ..to BHAIKSUKI GAP FILLER-21851charset[i++] = c++;1852c = 0x11c50; // from BHAIKSUKI DIGIT ZERO1853while (c <= 0x11c6c) // ..to BHAIKSUKI HUNDREDS UNIT MARK1854charset[i++] = c++;1855// 11C70..11CBF; Marchen1856c = 0x11c70; // from MARCHEN HEAD MARK1857while (c <= 0x11c8f) // ..to MARCHEN LETTER A1858charset[i++] = c++;1859c = 0x11c92; // from MARCHEN SUBJOINED LETTER KA1860while (c <= 0x11ca7) // ..to MARCHEN SUBJOINED LETTER ZA1861charset[i++] = c++;1862c = 0x11ca9; // from MARCHEN SUBJOINED LETTER YA1863while (c <= 0x11cb6) // ..to MARCHEN SIGN CANDRABINDU1864charset[i++] = c++;1865// 11D00..11D5F; Masaram Gondi1866c = 0x11d00; // from MASARAM GONDI LETTER A1867while (c <= 0x11d06) // ..to MASARAM GONDI LETTER E1868charset[i++] = c++;1869charset[i++] = 0x11d08; // MASARAM GONDI LETTER AI1870charset[i++] = 0x11d09; // MASARAM GONDI LETTER O1871c = 0x11d0b; // from MASARAM GONDI LETTER AU1872while (c <= 0x11d36) // ..to MASARAM GONDI VOWEL SIGN VOCALIC R1873charset[i++] = c++;1874charset[i++] = 0x11d3c; // MASARAM GONDI VOWEL SIGN AI1875charset[i++] = 0x11d3d; // MASARAM GONDI VOWEL SIGN O1876c = 0x11d3f; // from MASARAM GONDI VOWEL SIGN AU1877while (c <= 0x11d47) // ..to MASARAM GONDI RA-KARA1878charset[i++] = c++;1879c = 0x11d50; // from MASARAM GONDI DIGIT ZERO1880while (c <= 0x11d59) // ..to MASARAM GONDI DIGIT NINE1881charset[i++] = c++;1882// 11D60..11DAF; Gunjala Gondi1883c = 0x11d60; // from GUNJALA GONDI LETTER A1884while (c <= 0x11d65) // ..to GUNJALA GONDI LETTER UU1885charset[i++] = c++;1886charset[i++] = 0x11d67; // GUNJALA GONDI LETTER EE1887charset[i++] = 0x11d68; // GUNJALA GONDI LETTER AI1888c = 0x11d6a; // from GUNJALA GONDI LETTER OO1889while (c <= 0x11d8e) // ..to GUNJALA GONDI VOWEL SIGN UU1890charset[i++] = c++;1891charset[i++] = 0x11d90; // GUNJALA GONDI VOWEL SIGN EE1892charset[i++] = 0x11d91; // GUNJALA GONDI VOWEL SIGN AI1893c = 0x11d93; // from GUNJALA GONDI VOWEL SIGN OO1894while (c <= 0x11d98) // ..to GUNJALA GONDI OM1895charset[i++] = c++;1896c = 0x11da0; // from GUNJALA GONDI DIGIT ZERO1897while (c <= 0x11da9) // ..to GUNJALA GONDI DIGIT NINE1898charset[i++] = c++;1899// 11EE0..11EFF; Makasar1900c = 0x11ee0; // from MAKASAR LETTER KA1901while (c <= 0x11ef8) // ..to MAKASAR END OF SECTION1902charset[i++] = c++;1903// 11FB0..11FBF; Lisu Supplement1904charset[i++] = 0x11fb0; // LISU LETTER YHA1905// 11FC0..11FFF; Tamil Supplement1906c = 0x11fc0; // from TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH1907while (c <= 0x11ff1) // ..to TAMIL SIGN VAKAIYARAA1908charset[i++] = c++;1909charset[i++] = 0x11fff; // TAMIL PUNCTUATION END OF TEXT1910// 12000..123FF; Cuneiform1911c = 0x12000; // from CUNEIFORM SIGN A1912while (c <= 0x12399) // ..to CUNEIFORM SIGN U U1913charset[i++] = c++;1914// 12400..1247F; Cuneiform Numbers and Punctuation1915c = 0x12400; // from CUNEIFORM NUMERIC SIGN TWO ASH1916while (c <= 0x1246e) // ..to CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM1917charset[i++] = c++;1918c = 0x12470; // from CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER1919while (c <= 0x12474) // ..to CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON1920charset[i++] = c++;1921// 12480..1254F; Early Dynastic Cuneiform1922c = 0x12480; // from CUNEIFORM SIGN AB TIMES NUN TENU1923while (c <= 0x12543) // ..to CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU1924charset[i++] = c++;1925// 13000..1342F; Egyptian Hieroglyphs1926c = 0x13000; // from EGYPTIAN HIEROGLYPH A0011927while (c <= 0x1342e) // ..to EGYPTIAN HIEROGLYPH AA0321928charset[i++] = c++;1929// 13430..1343F; Egyptian Hieroglyph Format Controls1930c = 0x13430; // from EGYPTIAN HIEROGLYPH VERTICAL JOINER1931while (c <= 0x13438) // ..to EGYPTIAN HIEROGLYPH END SEGMENT1932charset[i++] = c++;1933// 14400..1467F; Anatolian Hieroglyphs1934c = 0x14400; // from ANATOLIAN HIEROGLYPH A0011935while (c <= 0x14646) // ..to ANATOLIAN HIEROGLYPH A5301936charset[i++] = c++;1937// 16800..16A3F; Bamum Supplement1938c = 0x16800; // from BAMUM LETTER PHASE-A NGKUE MFON1939while (c <= 0x16a38) // ..to BAMUM LETTER PHASE-F VUEQ1940charset[i++] = c++;1941// 16A40..16A6F; Mro1942c = 0x16a40; // from MRO LETTER TA1943while (c <= 0x16a5e) // ..to MRO LETTER TEK1944charset[i++] = c++;1945c = 0x16a60; // from MRO DIGIT ZERO1946while (c <= 0x16a69) // ..to MRO DIGIT NINE1947charset[i++] = c++;1948charset[i++] = 0x16a6e; // MRO DANDA1949charset[i++] = 0x16a6f; // MRO DOUBLE DANDA1950// 16AD0..16AFF; Bassa Vah1951c = 0x16ad0; // from BASSA VAH LETTER ENNI1952while (c <= 0x16aed) // ..to BASSA VAH LETTER I1953charset[i++] = c++;1954c = 0x16af0; // from BASSA VAH COMBINING HIGH TONE1955while (c <= 0x16af5) // ..to BASSA VAH FULL STOP1956charset[i++] = c++;1957// 16B00..16B8F; Pahawh Hmong1958c = 0x16b00; // from PAHAWH HMONG VOWEL KEEB1959while (c <= 0x16b45) // ..to PAHAWH HMONG SIGN CIM TSOV ROG1960charset[i++] = c++;1961c = 0x16b50; // from PAHAWH HMONG DIGIT ZERO1962while (c <= 0x16b59) // ..to PAHAWH HMONG DIGIT NINE1963charset[i++] = c++;1964c = 0x16b5b; // from PAHAWH HMONG NUMBER TENS1965while (c <= 0x16b61) // ..to PAHAWH HMONG NUMBER TRILLIONS1966charset[i++] = c++;1967c = 0x16b63; // from PAHAWH HMONG SIGN VOS LUB1968while (c <= 0x16b77) // ..to PAHAWH HMONG SIGN CIM NRES TOS1969charset[i++] = c++;1970c = 0x16b7d; // from PAHAWH HMONG CLAN SIGN TSHEEJ1971while (c <= 0x16b8f) // ..to PAHAWH HMONG CLAN SIGN VWJ1972charset[i++] = c++;1973// 16E40..16E9F; Medefaidrin1974c = 0x16e40; // from MEDEFAIDRIN CAPITAL LETTER M1975while (c <= 0x16e9a) // ..to MEDEFAIDRIN EXCLAMATION OH1976charset[i++] = c++;1977// 16F00..16F9F; Miao1978c = 0x16f00; // from MIAO LETTER PA1979while (c <= 0x16f4a) // ..to MIAO LETTER RTE1980charset[i++] = c++;1981c = 0x16f4f; // from MIAO SIGN CONSONANT MODIFIER BAR1982while (c <= 0x16f87) // ..to MIAO VOWEL SIGN UI1983charset[i++] = c++;1984c = 0x16f8f; // from MIAO TONE RIGHT1985while (c <= 0x16f9f) // ..to MIAO LETTER REFORMED TONE-81986charset[i++] = c++;1987// 16FE0..16FFF; Ideographic Symbols and Punctuation1988c = 0x16fe0; // from TANGUT ITERATION MARK1989while (c <= 0x16fe4) // ..to KHITAN SMALL SCRIPT FILLER1990charset[i++] = c++;1991charset[i++] = 0x16ff0; // VIETNAMESE ALTERNATE READING MARK CA1992charset[i++] = 0x16ff1; // VIETNAMESE ALTERNATE READING MARK NHAY1993// 17000..187FF; Tangut1994c = 0x17000; // from <Tangut Ideograph, First>1995while (c <= 0x187f7) // ..to <Tangut Ideograph, Last>1996charset[i++] = c++;1997// 18800..18AFF; Tangut Components1998c = 0x18800; // from TANGUT COMPONENT-0011999while (c <= 0x18aff) // ..to TANGUT COMPONENT-7682000charset[i++] = c++;2001// 18B00..18CFF; Khitan Small Script2002c = 0x18b00; // from KHITAN SMALL SCRIPT CHARACTER-18B002003while (c <= 0x18cd5) // ..to KHITAN SMALL SCRIPT CHARACTER-18CD52004charset[i++] = c++;2005// 18D00..18D8F; Tangut Supplement2006c = 0x18d00; // from <Tangut Ideograph Supplement, First>2007while (c <= 0x18d08) // ..to <Tangut Ideograph Supplement, Last>2008charset[i++] = c++;2009// 1B000..1B0FF; Kana Supplement2010c = 0x1b000; // from KATAKANA LETTER ARCHAIC E2011while (c <= 0x1b0ff) // ..to HENTAIGANA LETTER RE-22012charset[i++] = c++;2013// 1B100..1B12F; Kana Extended-A2014c = 0x1b100; // from HENTAIGANA LETTER RE-32015while (c <= 0x1b11e) // ..to HENTAIGANA LETTER N-MU-MO-22016charset[i++] = c++;2017// 1B130..1B16F; Small Kana Extension2018charset[i++] = 0x1b150; // HIRAGANA LETTER SMALL WI2019charset[i++] = 0x1b152; // HIRAGANA LETTER SMALL WO2020c = 0x1b164; // from KATAKANA LETTER SMALL WI2021while (c <= 0x1b167) // ..to KATAKANA LETTER SMALL N2022charset[i++] = c++;2023// 1B170..1B2FF; Nushu2024c = 0x1b170; // from NUSHU CHARACTER-1B1702025while (c <= 0x1b2fb) // ..to NUSHU CHARACTER-1B2FB2026charset[i++] = c++;2027// 1BC00..1BC9F; Duployan2028c = 0x1bc00; // from DUPLOYAN LETTER H2029while (c <= 0x1bc6a) // ..to DUPLOYAN LETTER VOCALIC M2030charset[i++] = c++;2031c = 0x1bc70; // from DUPLOYAN AFFIX LEFT HORIZONTAL SECANT2032while (c <= 0x1bc7c) // ..to DUPLOYAN AFFIX ATTACHED TANGENT HOOK2033charset[i++] = c++;2034c = 0x1bc80; // from DUPLOYAN AFFIX HIGH ACUTE2035while (c <= 0x1bc88) // ..to DUPLOYAN AFFIX HIGH VERTICAL2036charset[i++] = c++;2037c = 0x1bc90; // from DUPLOYAN AFFIX LOW ACUTE2038while (c <= 0x1bc99) // ..to DUPLOYAN AFFIX LOW ARROW2039charset[i++] = c++;2040c = 0x1bc9c; // from DUPLOYAN SIGN O WITH CROSS2041while (c <= 0x1bc9f) // ..to DUPLOYAN PUNCTUATION CHINOOK FULL STOP2042charset[i++] = c++;2043// 1BCA0..1BCAF; Shorthand Format Controls2044c = 0x1bca0; // from SHORTHAND FORMAT LETTER OVERLAP2045while (c <= 0x1bca3) // ..to SHORTHAND FORMAT UP STEP2046charset[i++] = c++;2047// 1D000..1D0FF; Byzantine Musical Symbols2048c = 0x1d000; // from BYZANTINE MUSICAL SYMBOL PSILI2049while (c <= 0x1d0f5) // ..to BYZANTINE MUSICAL SYMBOL GORGON NEO KATO2050charset[i++] = c++;2051// 1D100..1D1FF; Musical Symbols2052c = 0x1d100; // from MUSICAL SYMBOL SINGLE BARLINE2053while (c <= 0x1d126) // ..to MUSICAL SYMBOL DRUM CLEF-22054charset[i++] = c++;2055c = 0x1d129; // from MUSICAL SYMBOL MULTIPLE MEASURE REST2056while (c <= 0x1d1e8) // ..to MUSICAL SYMBOL KIEVAN FLAT SIGN2057charset[i++] = c++;2058// 1D200..1D24F; Ancient Greek Musical Notation2059c = 0x1d200; // from GREEK VOCAL NOTATION SYMBOL-12060while (c <= 0x1d245) // ..to GREEK MUSICAL LEIMMA2061charset[i++] = c++;2062// 1D2E0..1D2FF; Mayan Numerals2063c = 0x1d2e0; // from MAYAN NUMERAL ZERO2064while (c <= 0x1d2f3) // ..to MAYAN NUMERAL NINETEEN2065charset[i++] = c++;2066// 1D300..1D35F; Tai Xuan Jing Symbols2067c = 0x1d300; // from MONOGRAM FOR EARTH2068while (c <= 0x1d356) // ..to TETRAGRAM FOR FOSTERING2069charset[i++] = c++;2070// 1D360..1D37F; Counting Rod Numerals2071c = 0x1d360; // from COUNTING ROD UNIT DIGIT ONE2072while (c <= 0x1d378) // ..to TALLY MARK FIVE2073charset[i++] = c++;2074// 1D400..1D7FF; Mathematical Alphanumeric Symbols2075c = 0x1d400; // from MATHEMATICAL BOLD CAPITAL A2076while (c <= 0x1d454) // ..to MATHEMATICAL ITALIC SMALL G2077charset[i++] = c++;2078c = 0x1d456; // from MATHEMATICAL ITALIC SMALL I2079while (c <= 0x1d49c) // ..to MATHEMATICAL SCRIPT CAPITAL A2080charset[i++] = c++;2081charset[i++] = 0x1d49e; // MATHEMATICAL SCRIPT CAPITAL C2082charset[i++] = 0x1d49f; // MATHEMATICAL SCRIPT CAPITAL D2083charset[i++] = 0x1d4a5; // MATHEMATICAL SCRIPT CAPITAL J2084charset[i++] = 0x1d4a6; // MATHEMATICAL SCRIPT CAPITAL K2085c = 0x1d4a9; // from MATHEMATICAL SCRIPT CAPITAL N2086while (c <= 0x1d4ac) // ..to MATHEMATICAL SCRIPT CAPITAL Q2087charset[i++] = c++;2088c = 0x1d4ae; // from MATHEMATICAL SCRIPT CAPITAL S2089while (c <= 0x1d4b9) // ..to MATHEMATICAL SCRIPT SMALL D2090charset[i++] = c++;2091c = 0x1d4bd; // from MATHEMATICAL SCRIPT SMALL H2092while (c <= 0x1d4c3) // ..to MATHEMATICAL SCRIPT SMALL N2093charset[i++] = c++;2094c = 0x1d4c5; // from MATHEMATICAL SCRIPT SMALL P2095while (c <= 0x1d505) // ..to MATHEMATICAL FRAKTUR CAPITAL B2096charset[i++] = c++;2097c = 0x1d507; // from MATHEMATICAL FRAKTUR CAPITAL D2098while (c <= 0x1d50a) // ..to MATHEMATICAL FRAKTUR CAPITAL G2099charset[i++] = c++;2100c = 0x1d50d; // from MATHEMATICAL FRAKTUR CAPITAL J2101while (c <= 0x1d514) // ..to MATHEMATICAL FRAKTUR CAPITAL Q2102charset[i++] = c++;2103c = 0x1d516; // from MATHEMATICAL FRAKTUR CAPITAL S2104while (c <= 0x1d51c) // ..to MATHEMATICAL FRAKTUR CAPITAL Y2105charset[i++] = c++;2106c = 0x1d51e; // from MATHEMATICAL FRAKTUR SMALL A2107while (c <= 0x1d539) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL B2108charset[i++] = c++;2109c = 0x1d53b; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL D2110while (c <= 0x1d53e) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL G2111charset[i++] = c++;2112c = 0x1d540; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL I2113while (c <= 0x1d544) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL M2114charset[i++] = c++;2115c = 0x1d54a; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL S2116while (c <= 0x1d550) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL Y2117charset[i++] = c++;2118c = 0x1d552; // from MATHEMATICAL DOUBLE-STRUCK SMALL A2119while (c <= 0x1d6a5) // ..to MATHEMATICAL ITALIC SMALL DOTLESS J2120charset[i++] = c++;2121c = 0x1d6a8; // from MATHEMATICAL BOLD CAPITAL ALPHA2122while (c <= 0x1d7cb) // ..to MATHEMATICAL BOLD SMALL DIGAMMA2123charset[i++] = c++;2124c = 0x1d7ce; // from MATHEMATICAL BOLD DIGIT ZERO2125while (c <= 0x1d7ff) // ..to MATHEMATICAL MONOSPACE DIGIT NINE2126charset[i++] = c++;2127// 1D800..1DAAF; Sutton SignWriting2128c = 0x1d800; // from SIGNWRITING HAND-FIST INDEX2129while (c <= 0x1da8b) // ..to SIGNWRITING PARENTHESIS2130charset[i++] = c++;2131c = 0x1da9b; // from SIGNWRITING FILL MODIFIER-22132while (c <= 0x1da9f) // ..to SIGNWRITING FILL MODIFIER-62133charset[i++] = c++;2134c = 0x1daa1; // from SIGNWRITING ROTATION MODIFIER-22135while (c <= 0x1daaf) // ..to SIGNWRITING ROTATION MODIFIER-162136charset[i++] = c++;2137// 1E000..1E02F; Glagolitic Supplement2138c = 0x1e000; // from COMBINING GLAGOLITIC LETTER AZU2139while (c <= 0x1e006) // ..to COMBINING GLAGOLITIC LETTER ZHIVETE2140charset[i++] = c++;2141c = 0x1e008; // from COMBINING GLAGOLITIC LETTER ZEMLJA2142while (c <= 0x1e018) // ..to COMBINING GLAGOLITIC LETTER HERU2143charset[i++] = c++;2144c = 0x1e01b; // from COMBINING GLAGOLITIC LETTER SHTA2145while (c <= 0x1e021) // ..to COMBINING GLAGOLITIC LETTER YATI2146charset[i++] = c++;2147charset[i++] = 0x1e023; // COMBINING GLAGOLITIC LETTER YU2148charset[i++] = 0x1e024; // COMBINING GLAGOLITIC LETTER SMALL YUS2149c = 0x1e026; // from COMBINING GLAGOLITIC LETTER YO2150while (c <= 0x1e02a) // ..to COMBINING GLAGOLITIC LETTER FITA2151charset[i++] = c++;2152// 1E100..1E14F; Nyiakeng Puachue Hmong2153c = 0x1e100; // from NYIAKENG PUACHUE HMONG LETTER MA2154while (c <= 0x1e12c) // ..to NYIAKENG PUACHUE HMONG LETTER W2155charset[i++] = c++;2156c = 0x1e130; // from NYIAKENG PUACHUE HMONG TONE-B2157while (c <= 0x1e13d) // ..to NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER2158charset[i++] = c++;2159c = 0x1e140; // from NYIAKENG PUACHUE HMONG DIGIT ZERO2160while (c <= 0x1e149) // ..to NYIAKENG PUACHUE HMONG DIGIT NINE2161charset[i++] = c++;2162charset[i++] = 0x1e14e; // NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ2163charset[i++] = 0x1e14f; // NYIAKENG PUACHUE HMONG CIRCLED CA2164// 1E2C0..1E2FF; Wancho2165c = 0x1e2c0; // from WANCHO LETTER AA2166while (c <= 0x1e2f9) // ..to WANCHO DIGIT NINE2167charset[i++] = c++;2168charset[i++] = 0x1e2ff; // WANCHO NGUN SIGN2169// 1E800..1E8DF; Mende Kikakui2170c = 0x1e800; // from MENDE KIKAKUI SYLLABLE M001 KI2171while (c <= 0x1e8c4) // ..to MENDE KIKAKUI SYLLABLE M060 NYON2172charset[i++] = c++;2173c = 0x1e8c7; // from MENDE KIKAKUI DIGIT ONE2174while (c <= 0x1e8d6) // ..to MENDE KIKAKUI COMBINING NUMBER MILLIONS2175charset[i++] = c++;2176// 1E900..1E95F; Adlam2177c = 0x1e900; // from ADLAM CAPITAL LETTER ALIF2178while (c <= 0x1e94b) // ..to ADLAM NASALIZATION MARK2179charset[i++] = c++;2180c = 0x1e950; // from ADLAM DIGIT ZERO2181while (c <= 0x1e959) // ..to ADLAM DIGIT NINE2182charset[i++] = c++;2183charset[i++] = 0x1e95e; // ADLAM INITIAL EXCLAMATION MARK2184charset[i++] = 0x1e95f; // ADLAM INITIAL QUESTION MARK2185// 1EC70..1ECBF; Indic Siyaq Numbers2186c = 0x1ec71; // from INDIC SIYAQ NUMBER ONE2187while (c <= 0x1ecb4) // ..to INDIC SIYAQ ALTERNATE LAKH MARK2188charset[i++] = c++;2189// 1ED00..1ED4F; Ottoman Siyaq Numbers2190c = 0x1ed01; // from OTTOMAN SIYAQ NUMBER ONE2191while (c <= 0x1ed3d) // ..to OTTOMAN SIYAQ FRACTION ONE SIXTH2192charset[i++] = c++;2193// 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols2194c = 0x1ee00; // from ARABIC MATHEMATICAL ALEF2195while (c <= 0x1ee03) // ..to ARABIC MATHEMATICAL DAL2196charset[i++] = c++;2197c = 0x1ee05; // from ARABIC MATHEMATICAL WAW2198while (c <= 0x1ee1f) // ..to ARABIC MATHEMATICAL DOTLESS QAF2199charset[i++] = c++;2200charset[i++] = 0x1ee21; // ARABIC MATHEMATICAL INITIAL BEH2201charset[i++] = 0x1ee22; // ARABIC MATHEMATICAL INITIAL JEEM2202c = 0x1ee29; // from ARABIC MATHEMATICAL INITIAL YEH2203while (c <= 0x1ee32) // ..to ARABIC MATHEMATICAL INITIAL QAF2204charset[i++] = c++;2205c = 0x1ee34; // from ARABIC MATHEMATICAL INITIAL SHEEN2206while (c <= 0x1ee37) // ..to ARABIC MATHEMATICAL INITIAL KHAH2207charset[i++] = c++;2208charset[i++] = 0x1ee4d; // ARABIC MATHEMATICAL TAILED NOON2209charset[i++] = 0x1ee4f; // ARABIC MATHEMATICAL TAILED AIN2210charset[i++] = 0x1ee51; // ARABIC MATHEMATICAL TAILED SAD2211charset[i++] = 0x1ee52; // ARABIC MATHEMATICAL TAILED QAF2212charset[i++] = 0x1ee61; // ARABIC MATHEMATICAL STRETCHED BEH2213charset[i++] = 0x1ee62; // ARABIC MATHEMATICAL STRETCHED JEEM2214c = 0x1ee67; // from ARABIC MATHEMATICAL STRETCHED HAH2215while (c <= 0x1ee6a) // ..to ARABIC MATHEMATICAL STRETCHED KAF2216charset[i++] = c++;2217c = 0x1ee6c; // from ARABIC MATHEMATICAL STRETCHED MEEM2218while (c <= 0x1ee72) // ..to ARABIC MATHEMATICAL STRETCHED QAF2219charset[i++] = c++;2220c = 0x1ee74; // from ARABIC MATHEMATICAL STRETCHED SHEEN2221while (c <= 0x1ee77) // ..to ARABIC MATHEMATICAL STRETCHED KHAH2222charset[i++] = c++;2223c = 0x1ee79; // from ARABIC MATHEMATICAL STRETCHED DAD2224while (c <= 0x1ee7c) // ..to ARABIC MATHEMATICAL STRETCHED DOTLESS BEH2225charset[i++] = c++;2226c = 0x1ee80; // from ARABIC MATHEMATICAL LOOPED ALEF2227while (c <= 0x1ee89) // ..to ARABIC MATHEMATICAL LOOPED YEH2228charset[i++] = c++;2229c = 0x1ee8b; // from ARABIC MATHEMATICAL LOOPED LAM2230while (c <= 0x1ee9b) // ..to ARABIC MATHEMATICAL LOOPED GHAIN2231charset[i++] = c++;2232charset[i++] = 0x1eea1; // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH2233charset[i++] = 0x1eea3; // ARABIC MATHEMATICAL DOUBLE-STRUCK DAL2234c = 0x1eea5; // from ARABIC MATHEMATICAL DOUBLE-STRUCK WAW2235while (c <= 0x1eea9) // ..to ARABIC MATHEMATICAL DOUBLE-STRUCK YEH2236charset[i++] = c++;2237c = 0x1eeab; // from ARABIC MATHEMATICAL DOUBLE-STRUCK LAM2238while (c <= 0x1eebb) // ..to ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN2239charset[i++] = c++;2240charset[i++] = 0x1eef0; // ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL2241charset[i++] = 0x1eef1; // ARABIC MATHEMATICAL OPERATOR HAH WITH DAL2242// 1F000..1F02F; Mahjong Tiles2243c = 0x1f000; // from MAHJONG TILE EAST WIND2244while (c <= 0x1f02b) // ..to MAHJONG TILE BACK2245charset[i++] = c++;2246// 1F030..1F09F; Domino Tiles2247c = 0x1f030; // from DOMINO TILE HORIZONTAL BACK2248while (c <= 0x1f093) // ..to DOMINO TILE VERTICAL-06-062249charset[i++] = c++;2250// 1F0A0..1F0FF; Playing Cards2251c = 0x1f0a0; // from PLAYING CARD BACK2252while (c <= 0x1f0ae) // ..to PLAYING CARD KING OF SPADES2253charset[i++] = c++;2254c = 0x1f0b1; // from PLAYING CARD ACE OF HEARTS2255while (c <= 0x1f0bf) // ..to PLAYING CARD RED JOKER2256charset[i++] = c++;2257c = 0x1f0c1; // from PLAYING CARD ACE OF DIAMONDS2258while (c <= 0x1f0cf) // ..to PLAYING CARD BLACK JOKER2259charset[i++] = c++;2260c = 0x1f0d1; // from PLAYING CARD ACE OF CLUBS2261while (c <= 0x1f0f5) // ..to PLAYING CARD TRUMP-212262charset[i++] = c++;2263// 1F100..1F1FF; Enclosed Alphanumeric Supplement2264c = 0x1f100; // from DIGIT ZERO FULL STOP2265while (c <= 0x1f1ad) // ..to MASK WORK SYMBOL2266charset[i++] = c++;2267c = 0x1f1e6; // from REGIONAL INDICATOR SYMBOL LETTER A2268while (c <= 0x1f1ff) // ..to REGIONAL INDICATOR SYMBOL LETTER Z2269charset[i++] = c++;2270// 1F200..1F2FF; Enclosed Ideographic Supplement2271charset[i++] = 0x1f200; // SQUARE HIRAGANA HOKA2272charset[i++] = 0x1f202; // SQUARED KATAKANA SA2273c = 0x1f210; // from SQUARED CJK UNIFIED IDEOGRAPH-624B2274while (c <= 0x1f23b) // ..to SQUARED CJK UNIFIED IDEOGRAPH-914D2275charset[i++] = c++;2276c = 0x1f240; // from TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C2277while (c <= 0x1f248) // ..to TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-65572278charset[i++] = c++;2279charset[i++] = 0x1f250; // CIRCLED IDEOGRAPH ADVANTAGE2280charset[i++] = 0x1f251; // CIRCLED IDEOGRAPH ACCEPT2281c = 0x1f260; // from ROUNDED SYMBOL FOR FU2282while (c <= 0x1f265) // ..to ROUNDED SYMBOL FOR CAI2283charset[i++] = c++;2284// 1F300..1F5FF; Miscellaneous Symbols and Pictographs2285c = 0x1f300; // from CYCLONE2286while (c <= 0x1f5ff) // ..to MOYAI2287charset[i++] = c++;2288// 1F600..1F64F; Emoticons2289c = 0x1f600; // from GRINNING FACE2290while (c <= 0x1f64f) // ..to PERSON WITH FOLDED HANDS2291charset[i++] = c++;2292// 1F650..1F67F; Ornamental Dingbats2293c = 0x1f650; // from NORTH WEST POINTING LEAF2294while (c <= 0x1f67f) // ..to REVERSE CHECKER BOARD2295charset[i++] = c++;2296// 1F680..1F6FF; Transport and Map Symbols2297c = 0x1f680; // from ROCKET2298while (c <= 0x1f6d7) // ..to ELEVATOR2299charset[i++] = c++;2300c = 0x1f6e0; // from HAMMER AND WRENCH2301while (c <= 0x1f6ec) // ..to AIRPLANE ARRIVING2302charset[i++] = c++;2303c = 0x1f6f0; // from SATELLITE2304while (c <= 0x1f6fc) // ..to ROLLER SKATE2305charset[i++] = c++;2306// 1F700..1F77F; Alchemical Symbols2307c = 0x1f700; // from ALCHEMICAL SYMBOL FOR QUINTESSENCE2308while (c <= 0x1f773) // ..to ALCHEMICAL SYMBOL FOR HALF OUNCE2309charset[i++] = c++;2310// 1F780..1F7FF; Geometric Shapes Extended2311c = 0x1f780; // from BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE2312while (c <= 0x1f7d8) // ..to NEGATIVE CIRCLED SQUARE2313charset[i++] = c++;2314c = 0x1f7e0; // from LARGE ORANGE CIRCLE2315while (c <= 0x1f7eb) // ..to LARGE BROWN SQUARE2316charset[i++] = c++;2317// 1F800..1F8FF; Supplemental Arrows-C2318c = 0x1f800; // from LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD2319while (c <= 0x1f80b) // ..to DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD2320charset[i++] = c++;2321c = 0x1f810; // from LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD2322while (c <= 0x1f847) // ..to DOWNWARDS HEAVY ARROW2323charset[i++] = c++;2324c = 0x1f850; // from LEFTWARDS SANS-SERIF ARROW2325while (c <= 0x1f859) // ..to UP DOWN SANS-SERIF ARROW2326charset[i++] = c++;2327c = 0x1f860; // from WIDE-HEADED LEFTWARDS LIGHT BARB ARROW2328while (c <= 0x1f887) // ..to WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW2329charset[i++] = c++;2330c = 0x1f890; // from LEFTWARDS TRIANGLE ARROWHEAD2331while (c <= 0x1f8ad) // ..to WHITE ARROW SHAFT WIDTH TWO THIRDS2332charset[i++] = c++;2333charset[i++] = 0x1f8b0; // ARROW POINTING UPWARDS THEN NORTH WEST2334charset[i++] = 0x1f8b1; // ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST2335// 1F900..1F9FF; Supplemental Symbols and Pictographs2336c = 0x1f900; // from CIRCLED CROSS FORMEE WITH FOUR DOTS2337while (c <= 0x1f978) // ..to DISGUISED FACE2338charset[i++] = c++;2339c = 0x1f97a; // from FACE WITH PLEADING EYES2340while (c <= 0x1f9cb) // ..to BUBBLE TEA2341charset[i++] = c++;2342c = 0x1f9cd; // from STANDING PERSON2343while (c <= 0x1f9ff) // ..to NAZAR AMULET2344charset[i++] = c++;2345// 1FA00..1FA6F; Chess Symbols2346c = 0x1fa00; // from NEUTRAL CHESS KING2347while (c <= 0x1fa53) // ..to BLACK CHESS KNIGHT-BISHOP2348charset[i++] = c++;2349c = 0x1fa60; // from XIANGQI RED GENERAL2350while (c <= 0x1fa6d) // ..to XIANGQI BLACK SOLDIER2351charset[i++] = c++;2352// 1FA70..1FAFF; Symbols and Pictographs Extended-A2353c = 0x1fa70; // from BALLET SHOES2354while (c <= 0x1fa74) // ..to THONG SANDAL2355charset[i++] = c++;2356charset[i++] = 0x1fa78; // DROP OF BLOOD2357charset[i++] = 0x1fa7a; // STETHOSCOPE2358c = 0x1fa80; // from YO-YO2359while (c <= 0x1fa86) // ..to NESTING DOLLS2360charset[i++] = c++;2361c = 0x1fa90; // from RINGED PLANET2362while (c <= 0x1faa8) // ..to ROCK2363charset[i++] = c++;2364c = 0x1fab0; // from FLY2365while (c <= 0x1fab6) // ..to FEATHER2366charset[i++] = c++;2367charset[i++] = 0x1fac0; // ANATOMICAL HEART2368charset[i++] = 0x1fac2; // PEOPLE HUGGING2369c = 0x1fad0; // from BLUEBERRIES2370while (c <= 0x1fad6) // ..to TEAPOT2371charset[i++] = c++;2372// 1FB00..1FBFF; Symbols for Legacy Computing2373c = 0x1fb00; // from BLOCK SEXTANT-12374while (c <= 0x1fb92) // ..to UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK2375charset[i++] = c++;2376c = 0x1fb94; // from LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK2377while (c <= 0x1fbca) // ..to WHITE UP-POINTING CHEVRON2378charset[i++] = c++;2379c = 0x1fbf0; // from SEGMENTED DIGIT ZERO2380while (c <= 0x1fbf9) // ..to SEGMENTED DIGIT NINE2381charset[i++] = c++;2382// 20000..2A6DF; CJK Unified Ideographs Extension B2383c = 0x20000; // from <CJK Ideograph Extension B, First>2384while (c <= 0x2a6dd) // ..to <CJK Ideograph Extension B, Last>2385charset[i++] = c++;2386// 2A700..2B73F; CJK Unified Ideographs Extension C2387c = 0x2a700; // from <CJK Ideograph Extension C, First>2388while (c <= 0x2b734) // ..to <CJK Ideograph Extension C, Last>2389charset[i++] = c++;2390// 2B740..2B81F; CJK Unified Ideographs Extension D2391c = 0x2b740; // from <CJK Ideograph Extension D, First>2392while (c <= 0x2b81d) // ..to <CJK Ideograph Extension D, Last>2393charset[i++] = c++;2394// 2B820..2CEAF; CJK Unified Ideographs Extension E2395c = 0x2b820; // from <CJK Ideograph Extension E, First>2396while (c <= 0x2cea1) // ..to <CJK Ideograph Extension E, Last>2397charset[i++] = c++;2398// 2CEB0..2EBEF; CJK Unified Ideographs Extension F2399c = 0x2ceb0; // from <CJK Ideograph Extension F, First>2400while (c <= 0x2ebe0) // ..to <CJK Ideograph Extension F, Last>2401charset[i++] = c++;2402// 2F800..2FA1F; CJK Compatibility Ideographs Supplement2403c = 0x2f800; // from CJK COMPATIBILITY IDEOGRAPH-2F8002404while (c <= 0x2fa1d) // ..to CJK COMPATIBILITY IDEOGRAPH-2FA1D2405charset[i++] = c++;2406// 30000..3134F; CJK Unified Ideographs Extension G2407c = 0x30000; // from <CJK Ideograph Extension G, First>2408while (c <= 0x3134a) // ..to <CJK Ideograph Extension G, Last>2409charset[i++] = c++;2410// E0000..E007F; Tags2411c = 0xe0020; // from TAG SPACE2412while (c <= 0xe007f) // ..to CANCEL TAG2413charset[i++] = c++;2414// E0100..E01EF; Variation Selectors Supplement2415// F0000..FFFFF; Supplementary Private Use Area-A2416// 100000..10FFFF; Supplementary Private Use Area-B24172418/* Zero-terminate it, and cache the first character */2419charset[i] = 0;2420c0 = charset[0];24212422last = minlength - 1;2423i = 0;2424while (i <= last) {2425id[i] = 0;2426word[i++] = c0;2427}2428lastid = -1;2429word[i] = 0;24302431/* We must init word with dummy data, it doesn't get set until filter() */2432word = 1;2433}24342435void generate()2436{2437int i;24382439/* Handle the typical case specially */2440if (word[last] = charset[++lastid]) return;24412442lastid = 0;2443word[i = last] = c0;2444while (i--) { // Have a preceding position?2445if (word[i] = charset[++id[i]]) return;2446id[i] = 0;2447word[i] = c0;2448}24492450if (++last < maxlength) { // Next length?2451id[last] = lastid = 0;2452word[last] = c0;2453word[last + 1] = 0;2454} else // We're done2455word = 0;2456}24572458void restore()2459{2460int i, c;24612462/* Calculate the current length and infer the character indices */2463last = 0;2464while (c = word[last]) {2465i = 0; while (charset[i] != c && charset[i]) i++;2466if (!charset[i]) i = 0; // Not found2467id[last++] = i;2468}2469lastid = id[--last];2470}247124722473