Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.
Path: blob/master/data/jtr/dumb16.conf
Views: 11765
# This software is Copyright (c) 2012-2020 magnum, and it is hereby1# released to the general public under the following terms:2# Redistribution and use in source and binary forms, with or without3# modification, are permitted.4#5# Generic implementation of "dumb" exhaustive search of Unicode BMP.6# Default is to try *all* allocated characters in the BMP of Unicode v137# (there's 55,387 of them). Even if a fast format can exhaust two characters8# in 15 minutes, three characters would take 1.5 years...9#10# Note that these modes will handle --max-len differently than normal: They11# will consider number of characters as opposed to number of bytes. This12# means you can naturally just use e.g. --max-len=3 for generating all13# three-character candidates (which may be up to 9 bytes each).14#15# Note that the (newer) cracking mode --subsets=full-unicode is way faster than16# this external mode, although not as easy to adapt to smaller portions of the17# Unicode space. See doc/SUBSETS1819[List.External:Dumb16]20int maxlength; // Maximum password length to try21int last; // Last character position, zero-based22int lastid; // Character index in the last position23int id[0x7f]; // Current character indices for other positions24int charset[0x10000], c0; // Characters2526void init()27{28int minlength;29int i, c;3031# Trigger UTF-32 handling in External mode32utf32 = 1;3334if (req_minlen)35minlength = req_minlen;36else37minlength = 1;38if (req_maxlen)39maxlength = req_maxlen;40else41maxlength = 2;4243/*44* This defines the character set. This is auto-generated from UnicodeData.txt45* and we skip control characters.46*/47i = 0;48// 0000..007F; Basic Latin49c = 0x20; // from SPACE50while (c <= 0x7e) // ..to TILDE51charset[i++] = c++;52// 0080..00FF; Latin-1 Supplement53c = 0xa0; // from NO-BREAK SPACE54while (c <= 0xff) // ..to LATIN SMALL LETTER Y WITH DIAERESIS55charset[i++] = c++;56// 0100..017F; Latin Extended-A57c = 0x100; // from LATIN CAPITAL LETTER A WITH MACRON58while (c <= 0x17f) // ..to LATIN SMALL LETTER LONG S59charset[i++] = c++;60// 0180..024F; Latin Extended-B61c = 0x180; // from LATIN SMALL LETTER B WITH STROKE62while (c <= 0x24f) // ..to LATIN SMALL LETTER Y WITH STROKE63charset[i++] = c++;64// 0250..02AF; IPA Extensions65c = 0x250; // from LATIN SMALL LETTER TURNED A66while (c <= 0x2af) // ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL67charset[i++] = c++;68// 02B0..02FF; Spacing Modifier Letters69c = 0x2b0; // from MODIFIER LETTER SMALL H70while (c <= 0x2ff) // ..to MODIFIER LETTER LOW LEFT ARROW71charset[i++] = c++;72// 0300..036F; Combining Diacritical Marks73c = 0x300; // from COMBINING GRAVE ACCENT74while (c <= 0x36f) // ..to COMBINING LATIN SMALL LETTER X75charset[i++] = c++;76// 0370..03FF; Greek and Coptic77c = 0x370; // from GREEK CAPITAL LETTER HETA78while (c <= 0x377) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA79charset[i++] = c++;80c = 0x37a; // from GREEK YPOGEGRAMMENI81while (c <= 0x37f) // ..to GREEK CAPITAL LETTER YOT82charset[i++] = c++;83c = 0x384; // from GREEK TONOS84while (c <= 0x38a) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS85charset[i++] = c++;86c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS87while (c <= 0x3a1) // ..to GREEK CAPITAL LETTER RHO88charset[i++] = c++;89c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA90while (c <= 0x3ff) // ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL91charset[i++] = c++;92// 0400..04FF; Cyrillic93c = 0x400; // from CYRILLIC CAPITAL LETTER IE WITH GRAVE94while (c <= 0x4ff) // ..to CYRILLIC SMALL LETTER HA WITH STROKE95charset[i++] = c++;96// 0500..052F; Cyrillic Supplement97c = 0x500; // from CYRILLIC CAPITAL LETTER KOMI DE98while (c <= 0x52f) // ..to CYRILLIC SMALL LETTER EL WITH DESCENDER99charset[i++] = c++;100// 0530..058F; Armenian101c = 0x531; // from ARMENIAN CAPITAL LETTER AYB102while (c <= 0x556) // ..to ARMENIAN CAPITAL LETTER FEH103charset[i++] = c++;104c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING105while (c <= 0x58a) // ..to ARMENIAN HYPHEN106charset[i++] = c++;107charset[i++] = 0x58d; // RIGHT-FACING ARMENIAN ETERNITY SIGN108charset[i++] = 0x58f; // ARMENIAN DRAM SIGN109// 0590..05FF; Hebrew110c = 0x591; // from HEBREW ACCENT ETNAHTA111while (c <= 0x5c7) // ..to HEBREW POINT QAMATS QATAN112charset[i++] = c++;113c = 0x5d0; // from HEBREW LETTER ALEF114while (c <= 0x5ea) // ..to HEBREW LETTER TAV115charset[i++] = c++;116c = 0x5ef; // from HEBREW YOD TRIANGLE117while (c <= 0x5f4) // ..to HEBREW PUNCTUATION GERSHAYIM118charset[i++] = c++;119// 0600..06FF; Arabic120c = 0x600; // from ARABIC NUMBER SIGN121while (c <= 0x61c) // ..to ARABIC LETTER MARK122charset[i++] = c++;123c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK124while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V125charset[i++] = c++;126// 0700..074F; Syriac127c = 0x700; // from SYRIAC END OF PARAGRAPH128while (c <= 0x70d) // ..to SYRIAC HARKLEAN ASTERISCUS129charset[i++] = c++;130c = 0x70f; // from SYRIAC ABBREVIATION MARK131while (c <= 0x74a) // ..to SYRIAC BARREKH132charset[i++] = c++;133charset[i++] = 0x74d; // SYRIAC LETTER SOGDIAN ZHAIN134charset[i++] = 0x74f; // SYRIAC LETTER SOGDIAN FE135// 0750..077F; Arabic Supplement136c = 0x750; // from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW137while (c <= 0x77f) // ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE138charset[i++] = c++;139// 0780..07BF; Thaana140c = 0x780; // from THAANA LETTER HAA141while (c <= 0x7b1) // ..to THAANA LETTER NAA142charset[i++] = c++;143// 07C0..07FF; NKo144c = 0x7c0; // from NKO DIGIT ZERO145while (c <= 0x7fa) // ..to NKO LAJANYALAN146charset[i++] = c++;147charset[i++] = 0x7fd; // NKO DANTAYALAN148charset[i++] = 0x7ff; // NKO TAMAN SIGN149// 0800..083F; Samaritan150c = 0x800; // from SAMARITAN LETTER ALAF151while (c <= 0x82d) // ..to SAMARITAN MARK NEQUDAA152charset[i++] = c++;153c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA154while (c <= 0x83e) // ..to SAMARITAN PUNCTUATION ANNAAU155charset[i++] = c++;156// 0840..085F; Mandaic157c = 0x840; // from MANDAIC LETTER HALQA158while (c <= 0x85b) // ..to MANDAIC GEMINATION MARK159charset[i++] = c++;160charset[i++] = 0x85e; // MANDAIC PUNCTUATION161// 0860..086F; Syriac Supplement162c = 0x860; // from SYRIAC LETTER MALAYALAM NGA163while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA164charset[i++] = c++;165// 08A0..08FF; Arabic Extended-A166c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW167while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW168charset[i++] = c++;169c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE170while (c <= 0x8c7) // ..to ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE171charset[i++] = c++;172c = 0x8d3; // from ARABIC SMALL LOW WAW173while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA174charset[i++] = c++;175// 0900..097F; Devanagari176c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU177while (c <= 0x97f) // ..to DEVANAGARI LETTER BBA178charset[i++] = c++;179// 0980..09FF; Bengali180c = 0x980; // from BENGALI ANJI181while (c <= 0x983) // ..to BENGALI SIGN VISARGA182charset[i++] = c++;183c = 0x985; // from BENGALI LETTER A184while (c <= 0x98c) // ..to BENGALI LETTER VOCALIC L185charset[i++] = c++;186charset[i++] = 0x98f; // BENGALI LETTER E187charset[i++] = 0x990; // BENGALI LETTER AI188c = 0x993; // from BENGALI LETTER O189while (c <= 0x9a8) // ..to BENGALI LETTER NA190charset[i++] = c++;191c = 0x9aa; // from BENGALI LETTER PA192while (c <= 0x9b0) // ..to BENGALI LETTER RA193charset[i++] = c++;194c = 0x9b6; // from BENGALI LETTER SHA195while (c <= 0x9b9) // ..to BENGALI LETTER HA196charset[i++] = c++;197c = 0x9bc; // from BENGALI SIGN NUKTA198while (c <= 0x9c4) // ..to BENGALI VOWEL SIGN VOCALIC RR199charset[i++] = c++;200charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E201charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI202c = 0x9cb; // from BENGALI VOWEL SIGN O203while (c <= 0x9ce) // ..to BENGALI LETTER KHANDA TA204charset[i++] = c++;205charset[i++] = 0x9dc; // BENGALI LETTER RRA206charset[i++] = 0x9dd; // BENGALI LETTER RHA207c = 0x9df; // from BENGALI LETTER YYA208while (c <= 0x9e3) // ..to BENGALI VOWEL SIGN VOCALIC LL209charset[i++] = c++;210c = 0x9e6; // from BENGALI DIGIT ZERO211while (c <= 0x9fe) // ..to BENGALI SANDHI MARK212charset[i++] = c++;213// 0A00..0A7F; Gurmukhi214charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI215charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA216c = 0xa05; // from GURMUKHI LETTER A217while (c <= 0xa0a) // ..to GURMUKHI LETTER UU218charset[i++] = c++;219charset[i++] = 0xa0f; // GURMUKHI LETTER EE220charset[i++] = 0xa10; // GURMUKHI LETTER AI221c = 0xa13; // from GURMUKHI LETTER OO222while (c <= 0xa28) // ..to GURMUKHI LETTER NA223charset[i++] = c++;224c = 0xa2a; // from GURMUKHI LETTER PA225while (c <= 0xa30) // ..to GURMUKHI LETTER RA226charset[i++] = c++;227charset[i++] = 0xa32; // GURMUKHI LETTER LA228charset[i++] = 0xa33; // GURMUKHI LETTER LLA229charset[i++] = 0xa35; // GURMUKHI LETTER VA230charset[i++] = 0xa36; // GURMUKHI LETTER SHA231charset[i++] = 0xa38; // GURMUKHI LETTER SA232charset[i++] = 0xa39; // GURMUKHI LETTER HA233c = 0xa3e; // from GURMUKHI VOWEL SIGN AA234while (c <= 0xa42) // ..to GURMUKHI VOWEL SIGN UU235charset[i++] = c++;236charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE237charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI238charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO239charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA240c = 0xa59; // from GURMUKHI LETTER KHHA241while (c <= 0xa5c) // ..to GURMUKHI LETTER RRA242charset[i++] = c++;243c = 0xa66; // from GURMUKHI DIGIT ZERO244while (c <= 0xa76) // ..to GURMUKHI ABBREVIATION SIGN245charset[i++] = c++;246// 0A80..0AFF; Gujarati247charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU248charset[i++] = 0xa83; // GUJARATI SIGN VISARGA249c = 0xa85; // from GUJARATI LETTER A250while (c <= 0xa8d) // ..to GUJARATI VOWEL CANDRA E251charset[i++] = c++;252charset[i++] = 0xa8f; // GUJARATI LETTER E253charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O254c = 0xa93; // from GUJARATI LETTER O255while (c <= 0xaa8) // ..to GUJARATI LETTER NA256charset[i++] = c++;257c = 0xaaa; // from GUJARATI LETTER PA258while (c <= 0xab0) // ..to GUJARATI LETTER RA259charset[i++] = c++;260charset[i++] = 0xab2; // GUJARATI LETTER LA261charset[i++] = 0xab3; // GUJARATI LETTER LLA262c = 0xab5; // from GUJARATI LETTER VA263while (c <= 0xab9) // ..to GUJARATI LETTER HA264charset[i++] = c++;265c = 0xabc; // from GUJARATI SIGN NUKTA266while (c <= 0xac5) // ..to GUJARATI VOWEL SIGN CANDRA E267charset[i++] = c++;268charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E269charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O270charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O271charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA272c = 0xae0; // from GUJARATI LETTER VOCALIC RR273while (c <= 0xae3) // ..to GUJARATI VOWEL SIGN VOCALIC LL274charset[i++] = c++;275c = 0xae6; // from GUJARATI DIGIT ZERO276while (c <= 0xaf1) // ..to GUJARATI RUPEE SIGN277charset[i++] = c++;278c = 0xaf9; // from GUJARATI LETTER ZHA279while (c <= 0xaff) // ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE280charset[i++] = c++;281// 0B00..0B7F; Oriya282charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU283charset[i++] = 0xb03; // ORIYA SIGN VISARGA284c = 0xb05; // from ORIYA LETTER A285while (c <= 0xb0c) // ..to ORIYA LETTER VOCALIC L286charset[i++] = c++;287charset[i++] = 0xb0f; // ORIYA LETTER E288charset[i++] = 0xb10; // ORIYA LETTER AI289c = 0xb13; // from ORIYA LETTER O290while (c <= 0xb28) // ..to ORIYA LETTER NA291charset[i++] = c++;292c = 0xb2a; // from ORIYA LETTER PA293while (c <= 0xb30) // ..to ORIYA LETTER RA294charset[i++] = c++;295charset[i++] = 0xb32; // ORIYA LETTER LA296charset[i++] = 0xb33; // ORIYA LETTER LLA297c = 0xb35; // from ORIYA LETTER VA298while (c <= 0xb39) // ..to ORIYA LETTER HA299charset[i++] = c++;300c = 0xb3c; // from ORIYA SIGN NUKTA301while (c <= 0xb44) // ..to ORIYA VOWEL SIGN VOCALIC RR302charset[i++] = c++;303charset[i++] = 0xb47; // ORIYA VOWEL SIGN E304charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI305charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O306charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA307charset[i++] = 0xb55; // ORIYA SIGN OVERLINE308charset[i++] = 0xb57; // ORIYA AU LENGTH MARK309charset[i++] = 0xb5c; // ORIYA LETTER RRA310charset[i++] = 0xb5d; // ORIYA LETTER RHA311c = 0xb5f; // from ORIYA LETTER YYA312while (c <= 0xb63) // ..to ORIYA VOWEL SIGN VOCALIC LL313charset[i++] = c++;314c = 0xb66; // from ORIYA DIGIT ZERO315while (c <= 0xb77) // ..to ORIYA FRACTION THREE SIXTEENTHS316charset[i++] = c++;317// 0B80..0BFF; Tamil318charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA319charset[i++] = 0xb83; // TAMIL SIGN VISARGA320c = 0xb85; // from TAMIL LETTER A321while (c <= 0xb8a) // ..to TAMIL LETTER UU322charset[i++] = c++;323charset[i++] = 0xb8e; // TAMIL LETTER E324charset[i++] = 0xb90; // TAMIL LETTER AI325c = 0xb92; // from TAMIL LETTER O326while (c <= 0xb95) // ..to TAMIL LETTER KA327charset[i++] = c++;328charset[i++] = 0xb99; // TAMIL LETTER NGA329charset[i++] = 0xb9a; // TAMIL LETTER CA330charset[i++] = 0xb9e; // TAMIL LETTER NYA331charset[i++] = 0xb9f; // TAMIL LETTER TTA332charset[i++] = 0xba3; // TAMIL LETTER NNA333charset[i++] = 0xba4; // TAMIL LETTER TA334charset[i++] = 0xba8; // TAMIL LETTER NA335charset[i++] = 0xbaa; // TAMIL LETTER PA336c = 0xbae; // from TAMIL LETTER MA337while (c <= 0xbb9) // ..to TAMIL LETTER HA338charset[i++] = c++;339c = 0xbbe; // from TAMIL VOWEL SIGN AA340while (c <= 0xbc2) // ..to TAMIL VOWEL SIGN UU341charset[i++] = c++;342charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E343charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI344c = 0xbca; // from TAMIL VOWEL SIGN O345while (c <= 0xbcd) // ..to TAMIL SIGN VIRAMA346charset[i++] = c++;347c = 0xbe6; // from TAMIL DIGIT ZERO348while (c <= 0xbfa) // ..to TAMIL NUMBER SIGN349charset[i++] = c++;350// 0C00..0C7F; Telugu351c = 0xc00; // from TELUGU SIGN COMBINING CANDRABINDU ABOVE352while (c <= 0xc0c) // ..to TELUGU LETTER VOCALIC L353charset[i++] = c++;354charset[i++] = 0xc0e; // TELUGU LETTER E355charset[i++] = 0xc10; // TELUGU LETTER AI356c = 0xc12; // from TELUGU LETTER O357while (c <= 0xc28) // ..to TELUGU LETTER NA358charset[i++] = c++;359c = 0xc2a; // from TELUGU LETTER PA360while (c <= 0xc39) // ..to TELUGU LETTER HA361charset[i++] = c++;362c = 0xc3d; // from TELUGU SIGN AVAGRAHA363while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR364charset[i++] = c++;365charset[i++] = 0xc46; // TELUGU VOWEL SIGN E366charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI367c = 0xc4a; // from TELUGU VOWEL SIGN O368while (c <= 0xc4d) // ..to TELUGU SIGN VIRAMA369charset[i++] = c++;370charset[i++] = 0xc55; // TELUGU LENGTH MARK371charset[i++] = 0xc56; // TELUGU AI LENGTH MARK372charset[i++] = 0xc58; // TELUGU LETTER TSA373charset[i++] = 0xc5a; // TELUGU LETTER RRRA374c = 0xc60; // from TELUGU LETTER VOCALIC RR375while (c <= 0xc63) // ..to TELUGU VOWEL SIGN VOCALIC LL376charset[i++] = c++;377c = 0xc66; // from TELUGU DIGIT ZERO378while (c <= 0xc6f) // ..to TELUGU DIGIT NINE379charset[i++] = c++;380c = 0xc77; // from TELUGU SIGN SIDDHAM381while (c <= 0xc7f) // ..to TELUGU SIGN TUUMU382charset[i++] = c++;383// 0C80..0CFF; Kannada384c = 0xc80; // from KANNADA SIGN SPACING CANDRABINDU385while (c <= 0xc8c) // ..to KANNADA LETTER VOCALIC L386charset[i++] = c++;387charset[i++] = 0xc8e; // KANNADA LETTER E388charset[i++] = 0xc90; // KANNADA LETTER AI389c = 0xc92; // from KANNADA LETTER O390while (c <= 0xca8) // ..to KANNADA LETTER NA391charset[i++] = c++;392c = 0xcaa; // from KANNADA LETTER PA393while (c <= 0xcb3) // ..to KANNADA LETTER LLA394charset[i++] = c++;395c = 0xcb5; // from KANNADA LETTER VA396while (c <= 0xcb9) // ..to KANNADA LETTER HA397charset[i++] = c++;398c = 0xcbc; // from KANNADA SIGN NUKTA399while (c <= 0xcc4) // ..to KANNADA VOWEL SIGN VOCALIC RR400charset[i++] = c++;401charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E402charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI403c = 0xcca; // from KANNADA VOWEL SIGN O404while (c <= 0xccd) // ..to KANNADA SIGN VIRAMA405charset[i++] = c++;406charset[i++] = 0xcd5; // KANNADA LENGTH MARK407charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK408c = 0xce0; // from KANNADA LETTER VOCALIC RR409while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL410charset[i++] = c++;411c = 0xce6; // from KANNADA DIGIT ZERO412while (c <= 0xcef) // ..to KANNADA DIGIT NINE413charset[i++] = c++;414charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA415charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA416// 0D00..0D7F; Malayalam417c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE418while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L419charset[i++] = c++;420charset[i++] = 0xd0e; // MALAYALAM LETTER E421charset[i++] = 0xd10; // MALAYALAM LETTER AI422c = 0xd12; // from MALAYALAM LETTER O423while (c <= 0xd44) // ..to MALAYALAM VOWEL SIGN VOCALIC RR424charset[i++] = c++;425charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E426charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI427c = 0xd4a; // from MALAYALAM VOWEL SIGN O428while (c <= 0xd4f) // ..to MALAYALAM SIGN PARA429charset[i++] = c++;430c = 0xd54; // from MALAYALAM LETTER CHILLU M431while (c <= 0xd63) // ..to MALAYALAM VOWEL SIGN VOCALIC LL432charset[i++] = c++;433c = 0xd66; // from MALAYALAM DIGIT ZERO434while (c <= 0xd7f) // ..to MALAYALAM LETTER CHILLU K435charset[i++] = c++;436// 0D80..0DFF; Sinhala437charset[i++] = 0xd81; // SINHALA SIGN CANDRABINDU438charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA439c = 0xd85; // from SINHALA LETTER AYANNA440while (c <= 0xd96) // ..to SINHALA LETTER AUYANNA441charset[i++] = c++;442c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA443while (c <= 0xdb1) // ..to SINHALA LETTER DANTAJA NAYANNA444charset[i++] = c++;445c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA446while (c <= 0xdbb) // ..to SINHALA LETTER RAYANNA447charset[i++] = c++;448c = 0xdc0; // from SINHALA LETTER VAYANNA449while (c <= 0xdc6) // ..to SINHALA LETTER FAYANNA450charset[i++] = c++;451c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA452while (c <= 0xdd4) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA453charset[i++] = c++;454c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA455while (c <= 0xddf) // ..to SINHALA VOWEL SIGN GAYANUKITTA456charset[i++] = c++;457c = 0xde6; // from SINHALA LITH DIGIT ZERO458while (c <= 0xdef) // ..to SINHALA LITH DIGIT NINE459charset[i++] = c++;460charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA461charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA462// 0E00..0E7F; Thai463c = 0xe01; // from THAI CHARACTER KO KAI464while (c <= 0xe3a) // ..to THAI CHARACTER PHINTHU465charset[i++] = c++;466c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT467while (c <= 0xe5b) // ..to THAI CHARACTER KHOMUT468charset[i++] = c++;469// 0E80..0EFF; Lao470charset[i++] = 0xe81; // LAO LETTER KO471charset[i++] = 0xe82; // LAO LETTER KHO SUNG472c = 0xe86; // from LAO LETTER PALI GHA473while (c <= 0xe8a) // ..to LAO LETTER SO TAM474charset[i++] = c++;475c = 0xe8c; // from LAO LETTER PALI JHA476while (c <= 0xea3) // ..to LAO LETTER LO LING477charset[i++] = c++;478c = 0xea7; // from LAO LETTER WO479while (c <= 0xebd) // ..to LAO SEMIVOWEL SIGN NYO480charset[i++] = c++;481c = 0xec0; // from LAO VOWEL SIGN E482while (c <= 0xec4) // ..to LAO VOWEL SIGN AI483charset[i++] = c++;484c = 0xec8; // from LAO TONE MAI EK485while (c <= 0xecd) // ..to LAO NIGGAHITA486charset[i++] = c++;487c = 0xed0; // from LAO DIGIT ZERO488while (c <= 0xed9) // ..to LAO DIGIT NINE489charset[i++] = c++;490c = 0xedc; // from LAO HO NO491while (c <= 0xedf) // ..to LAO LETTER KHMU NYO492charset[i++] = c++;493// 0F00..0FFF; Tibetan494c = 0xf00; // from TIBETAN SYLLABLE OM495while (c <= 0xf47) // ..to TIBETAN LETTER JA496charset[i++] = c++;497c = 0xf49; // from TIBETAN LETTER NYA498while (c <= 0xf6c) // ..to TIBETAN LETTER RRA499charset[i++] = c++;500c = 0xf71; // from TIBETAN VOWEL SIGN AA501while (c <= 0xf97) // ..to TIBETAN SUBJOINED LETTER JA502charset[i++] = c++;503c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA504while (c <= 0xfbc) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA505charset[i++] = c++;506c = 0xfbe; // from TIBETAN KU RU KHA507while (c <= 0xfcc) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL508charset[i++] = c++;509c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR510while (c <= 0xfda) // ..to TIBETAN MARK TRAILING MCHAN RTAGS511charset[i++] = c++;512// 1000..109F; Myanmar513c = 0x1000; // from MYANMAR LETTER KA514while (c <= 0x109f) // ..to MYANMAR SYMBOL SHAN EXCLAMATION515charset[i++] = c++;516// 10A0..10FF; Georgian517c = 0x10a0; // from GEORGIAN CAPITAL LETTER AN518while (c <= 0x10c5) // ..to GEORGIAN CAPITAL LETTER HOE519charset[i++] = c++;520c = 0x10d0; // from GEORGIAN LETTER AN521while (c <= 0x10ff) // ..to GEORGIAN LETTER LABIAL SIGN522charset[i++] = c++;523// 1100..11FF; Hangul Jamo524c = 0x1100; // from HANGUL CHOSEONG KIYEOK525while (c <= 0x11ff) // ..to HANGUL JONGSEONG SSANGNIEUN526charset[i++] = c++;527// 1200..137F; Ethiopic528c = 0x1200; // from ETHIOPIC SYLLABLE HA529while (c <= 0x1248) // ..to ETHIOPIC SYLLABLE QWA530charset[i++] = c++;531c = 0x124a; // from ETHIOPIC SYLLABLE QWI532while (c <= 0x124d) // ..to ETHIOPIC SYLLABLE QWE533charset[i++] = c++;534c = 0x1250; // from ETHIOPIC SYLLABLE QHA535while (c <= 0x1256) // ..to ETHIOPIC SYLLABLE QHO536charset[i++] = c++;537c = 0x125a; // from ETHIOPIC SYLLABLE QHWI538while (c <= 0x125d) // ..to ETHIOPIC SYLLABLE QHWE539charset[i++] = c++;540c = 0x1260; // from ETHIOPIC SYLLABLE BA541while (c <= 0x1288) // ..to ETHIOPIC SYLLABLE XWA542charset[i++] = c++;543c = 0x128a; // from ETHIOPIC SYLLABLE XWI544while (c <= 0x128d) // ..to ETHIOPIC SYLLABLE XWE545charset[i++] = c++;546c = 0x1290; // from ETHIOPIC SYLLABLE NA547while (c <= 0x12b0) // ..to ETHIOPIC SYLLABLE KWA548charset[i++] = c++;549c = 0x12b2; // from ETHIOPIC SYLLABLE KWI550while (c <= 0x12b5) // ..to ETHIOPIC SYLLABLE KWE551charset[i++] = c++;552c = 0x12b8; // from ETHIOPIC SYLLABLE KXA553while (c <= 0x12be) // ..to ETHIOPIC SYLLABLE KXO554charset[i++] = c++;555c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI556while (c <= 0x12c5) // ..to ETHIOPIC SYLLABLE KXWE557charset[i++] = c++;558c = 0x12c8; // from ETHIOPIC SYLLABLE WA559while (c <= 0x12d6) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O560charset[i++] = c++;561c = 0x12d8; // from ETHIOPIC SYLLABLE ZA562while (c <= 0x1310) // ..to ETHIOPIC SYLLABLE GWA563charset[i++] = c++;564c = 0x1312; // from ETHIOPIC SYLLABLE GWI565while (c <= 0x1315) // ..to ETHIOPIC SYLLABLE GWE566charset[i++] = c++;567c = 0x1318; // from ETHIOPIC SYLLABLE GGA568while (c <= 0x135a) // ..to ETHIOPIC SYLLABLE FYA569charset[i++] = c++;570c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK571while (c <= 0x137c) // ..to ETHIOPIC NUMBER TEN THOUSAND572charset[i++] = c++;573// 1380..139F; Ethiopic Supplement574c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA575while (c <= 0x1399) // ..to ETHIOPIC TONAL MARK KURT576charset[i++] = c++;577// 13A0..13FF; Cherokee578c = 0x13a0; // from CHEROKEE LETTER A579while (c <= 0x13f5) // ..to CHEROKEE LETTER MV580charset[i++] = c++;581c = 0x13f8; // from CHEROKEE SMALL LETTER YE582while (c <= 0x13fd) // ..to CHEROKEE SMALL LETTER MV583charset[i++] = c++;584// 1400..167F; Unified Canadian Aboriginal Syllabics585c = 0x1400; // from CANADIAN SYLLABICS HYPHEN586while (c <= 0x167f) // ..to CANADIAN SYLLABICS BLACKFOOT W587charset[i++] = c++;588// 1680..169F; Ogham589c = 0x1680; // from OGHAM SPACE MARK590while (c <= 0x169c) // ..to OGHAM REVERSED FEATHER MARK591charset[i++] = c++;592// 16A0..16FF; Runic593c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F594while (c <= 0x16f8) // ..to RUNIC LETTER FRANKS CASKET AESC595charset[i++] = c++;596// 1700..171F; Tagalog597c = 0x1700; // from TAGALOG LETTER A598while (c <= 0x170c) // ..to TAGALOG LETTER YA599charset[i++] = c++;600c = 0x170e; // from TAGALOG LETTER LA601while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA602charset[i++] = c++;603// 1720..173F; Hanunoo604c = 0x1720; // from HANUNOO LETTER A605while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION606charset[i++] = c++;607// 1740..175F; Buhid608c = 0x1740; // from BUHID LETTER A609while (c <= 0x1753) // ..to BUHID VOWEL SIGN U610charset[i++] = c++;611// 1760..177F; Tagbanwa612c = 0x1760; // from TAGBANWA LETTER A613while (c <= 0x176c) // ..to TAGBANWA LETTER YA614charset[i++] = c++;615charset[i++] = 0x176e; // TAGBANWA LETTER LA616charset[i++] = 0x1770; // TAGBANWA LETTER SA617charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I618charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U619// 1780..17FF; Khmer620c = 0x1780; // from KHMER LETTER KA621while (c <= 0x17dd) // ..to KHMER SIGN ATTHACAN622charset[i++] = c++;623c = 0x17e0; // from KHMER DIGIT ZERO624while (c <= 0x17e9) // ..to KHMER DIGIT NINE625charset[i++] = c++;626c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON627while (c <= 0x17f9) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON628charset[i++] = c++;629// 1800..18AF; Mongolian630c = 0x1800; // from MONGOLIAN BIRGA631while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR632charset[i++] = c++;633c = 0x1810; // from MONGOLIAN DIGIT ZERO634while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE635charset[i++] = c++;636c = 0x1820; // from MONGOLIAN LETTER A637while (c <= 0x1878) // ..to MONGOLIAN LETTER CHA WITH TWO DOTS638charset[i++] = c++;639c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE640while (c <= 0x18aa) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA641charset[i++] = c++;642// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended643c = 0x18b0; // from CANADIAN SYLLABICS OY644while (c <= 0x18f5) // ..to CANADIAN SYLLABICS CARRIER DENTAL S645charset[i++] = c++;646// 1900..194F; Limbu647c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER648while (c <= 0x191e) // ..to LIMBU LETTER TRA649charset[i++] = c++;650c = 0x1920; // from LIMBU VOWEL SIGN A651while (c <= 0x192b) // ..to LIMBU SUBJOINED LETTER WA652charset[i++] = c++;653c = 0x1930; // from LIMBU SMALL LETTER KA654while (c <= 0x193b) // ..to LIMBU SIGN SA-I655charset[i++] = c++;656c = 0x1944; // from LIMBU EXCLAMATION MARK657while (c <= 0x194f) // ..to LIMBU DIGIT NINE658charset[i++] = c++;659// 1950..197F; Tai Le660c = 0x1950; // from TAI LE LETTER KA661while (c <= 0x196d) // ..to TAI LE LETTER AI662charset[i++] = c++;663c = 0x1970; // from TAI LE LETTER TONE-2664while (c <= 0x1974) // ..to TAI LE LETTER TONE-6665charset[i++] = c++;666// 1980..19DF; New Tai Lue667c = 0x1980; // from NEW TAI LUE LETTER HIGH QA668while (c <= 0x19ab) // ..to NEW TAI LUE LETTER LOW SUA669charset[i++] = c++;670c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER671while (c <= 0x19c9) // ..to NEW TAI LUE TONE MARK-2672charset[i++] = c++;673c = 0x19d0; // from NEW TAI LUE DIGIT ZERO674while (c <= 0x19da) // ..to NEW TAI LUE THAM DIGIT ONE675charset[i++] = c++;676charset[i++] = 0x19de; // NEW TAI LUE SIGN LAE677charset[i++] = 0x19df; // NEW TAI LUE SIGN LAEV678// 19E0..19FF; Khmer Symbols679c = 0x19e0; // from KHMER SYMBOL PATHAMASAT680while (c <= 0x19ff) // ..to KHMER SYMBOL DAP-PRAM ROC681charset[i++] = c++;682// 1A00..1A1F; Buginese683c = 0x1a00; // from BUGINESE LETTER KA684while (c <= 0x1a1b) // ..to BUGINESE VOWEL SIGN AE685charset[i++] = c++;686charset[i++] = 0x1a1e; // BUGINESE PALLAWA687charset[i++] = 0x1a1f; // BUGINESE END OF SECTION688// 1A20..1AAF; Tai Tham689c = 0x1a20; // from TAI THAM LETTER HIGH KA690while (c <= 0x1a5e) // ..to TAI THAM CONSONANT SIGN SA691charset[i++] = c++;692c = 0x1a60; // from TAI THAM SIGN SAKOT693while (c <= 0x1a7c) // ..to TAI THAM SIGN KHUEN-LUE KARAN694charset[i++] = c++;695c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT696while (c <= 0x1a89) // ..to TAI THAM HORA DIGIT NINE697charset[i++] = c++;698c = 0x1a90; // from TAI THAM THAM DIGIT ZERO699while (c <= 0x1a99) // ..to TAI THAM THAM DIGIT NINE700charset[i++] = c++;701c = 0x1aa0; // from TAI THAM SIGN WIANG702while (c <= 0x1aad) // ..to TAI THAM SIGN CAANG703charset[i++] = c++;704// 1AB0..1AFF; Combining Diacritical Marks Extended705c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT706while (c <= 0x1ac0) // ..to COMBINING LATIN SMALL LETTER TURNED W BELOW707charset[i++] = c++;708// 1B00..1B7F; Balinese709c = 0x1b00; // from BALINESE SIGN ULU RICEM710while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK711charset[i++] = c++;712c = 0x1b50; // from BALINESE DIGIT ZERO713while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING714charset[i++] = c++;715// 1B80..1BBF; Sundanese716c = 0x1b80; // from SUNDANESE SIGN PANYECEK717while (c <= 0x1bbf) // ..to SUNDANESE LETTER FINAL M718charset[i++] = c++;719// 1BC0..1BFF; Batak720c = 0x1bc0; // from BATAK LETTER A721while (c <= 0x1bf3) // ..to BATAK PANONGONAN722charset[i++] = c++;723c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK724while (c <= 0x1bff) // ..to BATAK SYMBOL BINDU PANGOLAT725charset[i++] = c++;726// 1C00..1C4F; Lepcha727c = 0x1c00; // from LEPCHA LETTER KA728while (c <= 0x1c37) // ..to LEPCHA SIGN NUKTA729charset[i++] = c++;730c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL731while (c <= 0x1c49) // ..to LEPCHA DIGIT NINE732charset[i++] = c++;733charset[i++] = 0x1c4d; // LEPCHA LETTER TTA734charset[i++] = 0x1c4f; // LEPCHA LETTER DDA735// 1C50..1C7F; Ol Chiki736c = 0x1c50; // from OL CHIKI DIGIT ZERO737while (c <= 0x1c7f) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD738charset[i++] = c++;739// 1C80..1C8F; Cyrillic Extended-C740c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE741while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK742charset[i++] = c++;743// 1C90..1CBF; Georgian Extended744c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN745while (c <= 0x1cba) // ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN746charset[i++] = c++;747charset[i++] = 0x1cbd; // GEORGIAN MTAVRULI CAPITAL LETTER AEN748charset[i++] = 0x1cbf; // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN749// 1CC0..1CCF; Sundanese Supplement750c = 0x1cc0; // from SUNDANESE PUNCTUATION BINDU SURYA751while (c <= 0x1cc7) // ..to SUNDANESE PUNCTUATION BINDU BA SATANGA752charset[i++] = c++;753// 1CD0..1CFF; Vedic Extensions754c = 0x1cd0; // from VEDIC TONE KARSHANA755while (c <= 0x1cfa) // ..to VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA756charset[i++] = c++;757// 1D00..1D7F; Phonetic Extensions758c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A759while (c <= 0x1d7f) // ..to LATIN SMALL LETTER UPSILON WITH STROKE760charset[i++] = c++;761// 1D80..1DBF; Phonetic Extensions Supplement762c = 0x1d80; // from LATIN SMALL LETTER B WITH PALATAL HOOK763while (c <= 0x1dbf) // ..to MODIFIER LETTER SMALL THETA764charset[i++] = c++;765// 1DC0..1DFF; Combining Diacritical Marks Supplement766c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT767while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW768charset[i++] = c++;769c = 0x1dfb; // from COMBINING DELETION MARK770while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW771charset[i++] = c++;772// 1E00..1EFF; Latin Extended Additional773c = 0x1e00; // from LATIN CAPITAL LETTER A WITH RING BELOW774while (c <= 0x1eff) // ..to LATIN SMALL LETTER Y WITH LOOP775charset[i++] = c++;776// 1F00..1FFF; Greek Extended777c = 0x1f00; // from GREEK SMALL LETTER ALPHA WITH PSILI778while (c <= 0x1f15) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA779charset[i++] = c++;780c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI781while (c <= 0x1f1d) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA782charset[i++] = c++;783c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI784while (c <= 0x1f45) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA785charset[i++] = c++;786c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI787while (c <= 0x1f4d) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA788charset[i++] = c++;789c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI790while (c <= 0x1f57) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI791charset[i++] = c++;792c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI793while (c <= 0x1f7d) // ..to GREEK SMALL LETTER OMEGA WITH OXIA794charset[i++] = c++;795c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI796while (c <= 0x1fb4) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI797charset[i++] = c++;798c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI799while (c <= 0x1fc4) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI800charset[i++] = c++;801c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI802while (c <= 0x1fd3) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA803charset[i++] = c++;804c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI805while (c <= 0x1fdb) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA806charset[i++] = c++;807c = 0x1fdd; // from GREEK DASIA AND VARIA808while (c <= 0x1fef) // ..to GREEK VARIA809charset[i++] = c++;810charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI811charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI812c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI813while (c <= 0x1ffe) // ..to GREEK DASIA814charset[i++] = c++;815// 2000..206F; General Punctuation816c = 0x2000; // from EN QUAD817while (c <= 0x2064) // ..to INVISIBLE PLUS818charset[i++] = c++;819c = 0x2066; // from LEFT-TO-RIGHT ISOLATE820while (c <= 0x206f) // ..to NOMINAL DIGIT SHAPES821charset[i++] = c++;822// 2070..209F; Superscripts and Subscripts823charset[i++] = 0x2070; // SUPERSCRIPT ZERO824charset[i++] = 0x2071; // SUPERSCRIPT LATIN SMALL LETTER I825c = 0x2074; // from SUPERSCRIPT FOUR826while (c <= 0x208e) // ..to SUBSCRIPT RIGHT PARENTHESIS827charset[i++] = c++;828c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A829while (c <= 0x209c) // ..to LATIN SUBSCRIPT SMALL LETTER T830charset[i++] = c++;831// 20A0..20CF; Currency Symbols832c = 0x20a0; // from EURO-CURRENCY SIGN833while (c <= 0x20bf) // ..to BITCOIN SIGN834charset[i++] = c++;835// 20D0..20FF; Combining Diacritical Marks for Symbols836c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE837while (c <= 0x20f0) // ..to COMBINING ASTERISK ABOVE838charset[i++] = c++;839// 2100..214F; Letterlike Symbols840c = 0x2100; // from ACCOUNT OF841while (c <= 0x214f) // ..to SYMBOL FOR SAMARITAN SOURCE842charset[i++] = c++;843// 2150..218F; Number Forms844c = 0x2150; // from VULGAR FRACTION ONE SEVENTH845while (c <= 0x218b) // ..to TURNED DIGIT THREE846charset[i++] = c++;847// 2190..21FF; Arrows848c = 0x2190; // from LEFTWARDS ARROW849while (c <= 0x21ff) // ..to LEFT RIGHT OPEN-HEADED ARROW850charset[i++] = c++;851// 2200..22FF; Mathematical Operators852c = 0x2200; // from FOR ALL853while (c <= 0x22ff) // ..to Z NOTATION BAG MEMBERSHIP854charset[i++] = c++;855// 2300..23FF; Miscellaneous Technical856c = 0x2300; // from DIAMETER SIGN857while (c <= 0x23ff) // ..to OBSERVER EYE SYMBOL858charset[i++] = c++;859// 2400..243F; Control Pictures860c = 0x2400; // from SYMBOL FOR NULL861while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO862charset[i++] = c++;863// 2440..245F; Optical Character Recognition864c = 0x2440; // from OCR HOOK865while (c <= 0x244a) // ..to OCR DOUBLE BACKSLASH866charset[i++] = c++;867// 2460..24FF; Enclosed Alphanumerics868c = 0x2460; // from CIRCLED DIGIT ONE869while (c <= 0x24ff) // ..to NEGATIVE CIRCLED DIGIT ZERO870charset[i++] = c++;871// 2500..257F; Box Drawing872c = 0x2500; // from BOX DRAWINGS LIGHT HORIZONTAL873while (c <= 0x257f) // ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN874charset[i++] = c++;875// 2580..259F; Block Elements876c = 0x2580; // from UPPER HALF BLOCK877while (c <= 0x259f) // ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT878charset[i++] = c++;879// 25A0..25FF; Geometric Shapes880c = 0x25a0; // from BLACK SQUARE881while (c <= 0x25ff) // ..to LOWER RIGHT TRIANGLE882charset[i++] = c++;883// 2600..26FF; Miscellaneous Symbols884c = 0x2600; // from BLACK SUN WITH RAYS885while (c <= 0x26ff) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE886charset[i++] = c++;887// 2700..27BF; Dingbats888c = 0x2700; // from BLACK SAFETY SCISSORS889while (c <= 0x27bf) // ..to DOUBLE CURLY LOOP890charset[i++] = c++;891// 27C0..27EF; Miscellaneous Mathematical Symbols-A892c = 0x27c0; // from THREE DIMENSIONAL ANGLE893while (c <= 0x27ef) // ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS894charset[i++] = c++;895// 27F0..27FF; Supplemental Arrows-A896c = 0x27f0; // from UPWARDS QUADRUPLE ARROW897while (c <= 0x27ff) // ..to LONG RIGHTWARDS SQUIGGLE ARROW898charset[i++] = c++;899// 2800..28FF; Braille Patterns900c = 0x2800; // from BRAILLE PATTERN BLANK901while (c <= 0x28ff) // ..to BRAILLE PATTERN DOTS-12345678902charset[i++] = c++;903// 2900..297F; Supplemental Arrows-B904c = 0x2900; // from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE905while (c <= 0x297f) // ..to DOWN FISH TAIL906charset[i++] = c++;907// 2980..29FF; Miscellaneous Mathematical Symbols-B908c = 0x2980; // from TRIPLE VERTICAL BAR DELIMITER909while (c <= 0x29ff) // ..to MINY910charset[i++] = c++;911// 2A00..2AFF; Supplemental Mathematical Operators912c = 0x2a00; // from N-ARY CIRCLED DOT OPERATOR913while (c <= 0x2aff) // ..to N-ARY WHITE VERTICAL BAR914charset[i++] = c++;915// 2B00..2BFF; Miscellaneous Symbols and Arrows916c = 0x2b00; // from NORTH EAST WHITE ARROW917while (c <= 0x2b73) // ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR918charset[i++] = c++;919c = 0x2b76; // from NORTH WEST TRIANGLE-HEADED ARROW TO BAR920while (c <= 0x2b95) // ..to RIGHTWARDS BLACK ARROW921charset[i++] = c++;922c = 0x2b97; // from SYMBOL FOR TYPE A ELECTRONICS923while (c <= 0x2bff) // ..to HELLSCHREIBER PAUSE SYMBOL924charset[i++] = c++;925// 2C00..2C5F; Glagolitic926c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU927while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE928charset[i++] = c++;929c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU930while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE931charset[i++] = c++;932// 2C60..2C7F; Latin Extended-C933c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR934while (c <= 0x2c7f) // ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL935charset[i++] = c++;936// 2C80..2CFF; Coptic937c = 0x2c80; // from COPTIC CAPITAL LETTER ALFA938while (c <= 0x2cf3) // ..to COPTIC SMALL LETTER BOHAIRIC KHEI939charset[i++] = c++;940c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP941while (c <= 0x2cff) // ..to COPTIC MORPHOLOGICAL DIVIDER942charset[i++] = c++;943// 2D00..2D2F; Georgian Supplement944c = 0x2d00; // from GEORGIAN SMALL LETTER AN945while (c <= 0x2d25) // ..to GEORGIAN SMALL LETTER HOE946charset[i++] = c++;947c = 0x2d27; // from GEORGIAN SMALL LETTER YN948while (c <= 0x2d2d) // ..to GEORGIAN SMALL LETTER AEN949charset[i++] = c++;950// 2D30..2D7F; Tifinagh951c = 0x2d30; // from TIFINAGH LETTER YA952while (c <= 0x2d67) // ..to TIFINAGH LETTER YO953charset[i++] = c++;954charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK955charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK956charset[i++] = 0x2d7f; // TIFINAGH CONSONANT JOINER957// 2D80..2DDF; Ethiopic Extended958c = 0x2d80; // from ETHIOPIC SYLLABLE LOA959while (c <= 0x2d96) // ..to ETHIOPIC SYLLABLE GGWE960charset[i++] = c++;961c = 0x2da0; // from ETHIOPIC SYLLABLE SSA962while (c <= 0x2da6) // ..to ETHIOPIC SYLLABLE SSO963charset[i++] = c++;964c = 0x2da8; // from ETHIOPIC SYLLABLE CCA965while (c <= 0x2dae) // ..to ETHIOPIC SYLLABLE CCO966charset[i++] = c++;967c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA968while (c <= 0x2db6) // ..to ETHIOPIC SYLLABLE ZZO969charset[i++] = c++;970c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA971while (c <= 0x2dbe) // ..to ETHIOPIC SYLLABLE CCHO972charset[i++] = c++;973c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA974while (c <= 0x2dc6) // ..to ETHIOPIC SYLLABLE QYO975charset[i++] = c++;976c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA977while (c <= 0x2dce) // ..to ETHIOPIC SYLLABLE KYO978charset[i++] = c++;979c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA980while (c <= 0x2dd6) // ..to ETHIOPIC SYLLABLE XYO981charset[i++] = c++;982c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA983while (c <= 0x2dde) // ..to ETHIOPIC SYLLABLE GYO984charset[i++] = c++;985// 2DE0..2DFF; Cyrillic Extended-A986c = 0x2de0; // from COMBINING CYRILLIC LETTER BE987while (c <= 0x2dff) // ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS988charset[i++] = c++;989// 2E00..2E7F; Supplemental Punctuation990c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER991while (c <= 0x2e52) // ..to TIRONIAN SIGN CAPITAL ET992charset[i++] = c++;993// 2E80..2EFF; CJK Radicals Supplement994c = 0x2e80; // from CJK RADICAL REPEAT995while (c <= 0x2e99) // ..to CJK RADICAL RAP996charset[i++] = c++;997c = 0x2e9b; // from CJK RADICAL CHOKE998while (c <= 0x2ef3) // ..to CJK RADICAL C-SIMPLIFIED TURTLE999charset[i++] = c++;1000// 2F00..2FDF; Kangxi Radicals1001c = 0x2f00; // from KANGXI RADICAL ONE1002while (c <= 0x2fd5) // ..to KANGXI RADICAL FLUTE1003charset[i++] = c++;1004// 2FF0..2FFF; Ideographic Description Characters1005c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT1006while (c <= 0x2ffb) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID1007charset[i++] = c++;1008// 3000..303F; CJK Symbols and Punctuation1009c = 0x3000; // from IDEOGRAPHIC SPACE1010while (c <= 0x303f) // ..to IDEOGRAPHIC HALF FILL SPACE1011charset[i++] = c++;1012// 3040..309F; Hiragana1013c = 0x3041; // from HIRAGANA LETTER SMALL A1014while (c <= 0x3096) // ..to HIRAGANA LETTER SMALL KE1015charset[i++] = c++;1016c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK1017while (c <= 0x309f) // ..to HIRAGANA DIGRAPH YORI1018charset[i++] = c++;1019// 30A0..30FF; Katakana1020c = 0x30a0; // from KATAKANA-HIRAGANA DOUBLE HYPHEN1021while (c <= 0x30ff) // ..to KATAKANA DIGRAPH KOTO1022charset[i++] = c++;1023// 3100..312F; Bopomofo1024c = 0x3105; // from BOPOMOFO LETTER B1025while (c <= 0x312f) // ..to BOPOMOFO LETTER NN1026charset[i++] = c++;1027// 3130..318F; Hangul Compatibility Jamo1028c = 0x3131; // from HANGUL LETTER KIYEOK1029while (c <= 0x318e) // ..to HANGUL LETTER ARAEAE1030charset[i++] = c++;1031// 3190..319F; Kanbun1032c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK1033while (c <= 0x319f) // ..to IDEOGRAPHIC ANNOTATION MAN MARK1034charset[i++] = c++;1035// 31A0..31BF; Bopomofo Extended1036c = 0x31a0; // from BOPOMOFO LETTER BU1037while (c <= 0x31bf) // ..to BOPOMOFO LETTER AH1038charset[i++] = c++;1039// 31C0..31EF; CJK Strokes1040c = 0x31c0; // from CJK STROKE T1041while (c <= 0x31e3) // ..to CJK STROKE Q1042charset[i++] = c++;1043// 31F0..31FF; Katakana Phonetic Extensions1044c = 0x31f0; // from KATAKANA LETTER SMALL KU1045while (c <= 0x31ff) // ..to KATAKANA LETTER SMALL RO1046charset[i++] = c++;1047// 3200..32FF; Enclosed CJK Letters and Months1048c = 0x3200; // from PARENTHESIZED HANGUL KIYEOK1049while (c <= 0x321e) // ..to PARENTHESIZED KOREAN CHARACTER O HU1050charset[i++] = c++;1051c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE1052while (c <= 0x32ff) // ..to SQUARE ERA NAME REIWA1053charset[i++] = c++;1054// 3300..33FF; CJK Compatibility1055c = 0x3300; // from SQUARE APAATO1056while (c <= 0x33ff) // ..to SQUARE GAL1057charset[i++] = c++;1058// 3400..4DBF; CJK Unified Ideographs Extension A1059c = 0x3400; // from <CJK Ideograph Extension A, First>1060while (c <= 0x4dbf) // ..to <CJK Ideograph Extension A, Last>1061charset[i++] = c++;1062// 4DC0..4DFF; Yijing Hexagram Symbols1063c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN1064while (c <= 0x4dff) // ..to HEXAGRAM FOR BEFORE COMPLETION1065charset[i++] = c++;1066// 4E00..9FFF; CJK Unified Ideographs1067c = 0x4e00; // from <CJK Ideograph, First>1068while (c <= 0x9ffc) // ..to <CJK Ideograph, Last>1069charset[i++] = c++;1070// A000..A48F; Yi Syllables1071c = 0xa000; // from YI SYLLABLE IT1072while (c <= 0xa48c) // ..to YI SYLLABLE YYR1073charset[i++] = c++;1074// A490..A4CF; Yi Radicals1075c = 0xa490; // from YI RADICAL QOT1076while (c <= 0xa4c6) // ..to YI RADICAL KE1077charset[i++] = c++;1078// A4D0..A4FF; Lisu1079c = 0xa4d0; // from LISU LETTER BA1080while (c <= 0xa4ff) // ..to LISU PUNCTUATION FULL STOP1081charset[i++] = c++;1082// A500..A63F; Vai1083c = 0xa500; // from VAI SYLLABLE EE1084while (c <= 0xa62b) // ..to VAI SYLLABLE NDOLE DO1085charset[i++] = c++;1086// A640..A69F; Cyrillic Extended-B1087c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA1088while (c <= 0xa69f) // ..to COMBINING CYRILLIC LETTER IOTIFIED E1089charset[i++] = c++;1090// A6A0..A6FF; Bamum1091c = 0xa6a0; // from BAMUM LETTER A1092while (c <= 0xa6f7) // ..to BAMUM QUESTION MARK1093charset[i++] = c++;1094// A700..A71F; Modifier Tone Letters1095c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING1096while (c <= 0xa71f) // ..to MODIFIER LETTER LOW INVERTED EXCLAMATION MARK1097charset[i++] = c++;1098// A720..A7FF; Latin Extended-D1099c = 0xa720; // from MODIFIER LETTER STRESS AND HIGH TONE1100while (c <= 0xa7bf) // ..to LATIN SMALL LETTER GLOTTAL U1101charset[i++] = c++;1102c = 0xa7c2; // from LATIN CAPITAL LETTER ANGLICANA W1103while (c <= 0xa7ca) // ..to LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY1104charset[i++] = c++;1105c = 0xa7f5; // from LATIN CAPITAL LETTER REVERSED HALF H1106while (c <= 0xa7ff) // ..to LATIN EPIGRAPHIC LETTER ARCHAIC M1107charset[i++] = c++;1108// A800..A82F; Syloti Nagri1109c = 0xa800; // from SYLOTI NAGRI LETTER A1110while (c <= 0xa82c) // ..to SYLOTI NAGRI SIGN ALTERNATE HASANTA1111charset[i++] = c++;1112// A830..A83F; Common Indic Number Forms1113c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER1114while (c <= 0xa839) // ..to NORTH INDIC QUANTITY MARK1115charset[i++] = c++;1116// A840..A87F; Phags-pa1117c = 0xa840; // from PHAGS-PA LETTER KA1118while (c <= 0xa877) // ..to PHAGS-PA MARK DOUBLE SHAD1119charset[i++] = c++;1120// A880..A8DF; Saurashtra1121c = 0xa880; // from SAURASHTRA SIGN ANUSVARA1122while (c <= 0xa8c5) // ..to SAURASHTRA SIGN CANDRABINDU1123charset[i++] = c++;1124c = 0xa8ce; // from SAURASHTRA DANDA1125while (c <= 0xa8d9) // ..to SAURASHTRA DIGIT NINE1126charset[i++] = c++;1127// A8E0..A8FF; Devanagari Extended1128c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO1129while (c <= 0xa8ff) // ..to DEVANAGARI VOWEL SIGN AY1130charset[i++] = c++;1131// A900..A92F; Kayah Li1132c = 0xa900; // from KAYAH LI DIGIT ZERO1133while (c <= 0xa92f) // ..to KAYAH LI SIGN SHYA1134charset[i++] = c++;1135// A930..A95F; Rejang1136c = 0xa930; // from REJANG LETTER KA1137while (c <= 0xa953) // ..to REJANG VIRAMA1138charset[i++] = c++;1139charset[i++] = 0xa95f; // REJANG SECTION MARK1140// A960..A97F; Hangul Jamo Extended-A1141c = 0xa960; // from HANGUL CHOSEONG TIKEUT-MIEUM1142while (c <= 0xa97c) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH1143charset[i++] = c++;1144// A980..A9DF; Javanese1145c = 0xa980; // from JAVANESE SIGN PANYANGGA1146while (c <= 0xa9cd) // ..to JAVANESE TURNED PADA PISELEH1147charset[i++] = c++;1148c = 0xa9cf; // from JAVANESE PANGRANGKEP1149while (c <= 0xa9d9) // ..to JAVANESE DIGIT NINE1150charset[i++] = c++;1151charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES1152charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN1153// A9E0..A9FF; Myanmar Extended-B1154c = 0xa9e0; // from MYANMAR LETTER SHAN GHA1155while (c <= 0xa9fe) // ..to MYANMAR LETTER TAI LAING BHA1156charset[i++] = c++;1157// AA00..AA5F; Cham1158c = 0xaa00; // from CHAM LETTER A1159while (c <= 0xaa36) // ..to CHAM CONSONANT SIGN WA1160charset[i++] = c++;1161c = 0xaa40; // from CHAM LETTER FINAL K1162while (c <= 0xaa4d) // ..to CHAM CONSONANT SIGN FINAL H1163charset[i++] = c++;1164c = 0xaa50; // from CHAM DIGIT ZERO1165while (c <= 0xaa59) // ..to CHAM DIGIT NINE1166charset[i++] = c++;1167c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL1168while (c <= 0xaa5f) // ..to CHAM PUNCTUATION TRIPLE DANDA1169charset[i++] = c++;1170// AA60..AA7F; Myanmar Extended-A1171c = 0xaa60; // from MYANMAR LETTER KHAMTI GA1172while (c <= 0xaa7f) // ..to MYANMAR LETTER SHWE PALAUNG SHA1173charset[i++] = c++;1174// AA80..AADF; Tai Viet1175c = 0xaa80; // from TAI VIET LETTER LOW KO1176while (c <= 0xaac2) // ..to TAI VIET TONE MAI SONG1177charset[i++] = c++;1178c = 0xaadb; // from TAI VIET SYMBOL KON1179while (c <= 0xaadf) // ..to TAI VIET SYMBOL KOI KOI1180charset[i++] = c++;1181// AAE0..AAFF; Meetei Mayek Extensions1182c = 0xaae0; // from MEETEI MAYEK LETTER E1183while (c <= 0xaaf6) // ..to MEETEI MAYEK VIRAMA1184charset[i++] = c++;1185// AB00..AB2F; Ethiopic Extended-A1186c = 0xab01; // from ETHIOPIC SYLLABLE TTHU1187while (c <= 0xab06) // ..to ETHIOPIC SYLLABLE TTHO1188charset[i++] = c++;1189c = 0xab09; // from ETHIOPIC SYLLABLE DDHU1190while (c <= 0xab0e) // ..to ETHIOPIC SYLLABLE DDHO1191charset[i++] = c++;1192c = 0xab11; // from ETHIOPIC SYLLABLE DZU1193while (c <= 0xab16) // ..to ETHIOPIC SYLLABLE DZO1194charset[i++] = c++;1195c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA1196while (c <= 0xab26) // ..to ETHIOPIC SYLLABLE CCHHO1197charset[i++] = c++;1198c = 0xab28; // from ETHIOPIC SYLLABLE BBA1199while (c <= 0xab2e) // ..to ETHIOPIC SYLLABLE BBO1200charset[i++] = c++;1201// AB30..AB6F; Latin Extended-E1202c = 0xab30; // from LATIN SMALL LETTER BARRED ALPHA1203while (c <= 0xab6b) // ..to MODIFIER LETTER RIGHT TACK1204charset[i++] = c++;1205// AB70..ABBF; Cherokee Supplement1206c = 0xab70; // from CHEROKEE SMALL LETTER A1207while (c <= 0xabbf) // ..to CHEROKEE SMALL LETTER YA1208charset[i++] = c++;1209// ABC0..ABFF; Meetei Mayek1210c = 0xabc0; // from MEETEI MAYEK LETTER KOK1211while (c <= 0xabed) // ..to MEETEI MAYEK APUN IYEK1212charset[i++] = c++;1213c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO1214while (c <= 0xabf9) // ..to MEETEI MAYEK DIGIT NINE1215charset[i++] = c++;1216// AC00..D7AF; Hangul Syllables1217c = 0xac00; // from <Hangul Syllable, First>1218while (c <= 0xd7a3) // ..to <Hangul Syllable, Last>1219charset[i++] = c++;1220// D7B0..D7FF; Hangul Jamo Extended-B1221c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO1222while (c <= 0xd7c6) // ..to HANGUL JUNGSEONG ARAEA-E1223charset[i++] = c++;1224c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL1225while (c <= 0xd7fb) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH1226charset[i++] = c++;1227// D800..DB7F; High Surrogates1228// DB80..DBFF; High Private Use Surrogates1229// DC00..DFFF; Low Surrogates1230// E000..F8FF; Private Use Area1231// F900..FAFF; CJK Compatibility Ideographs1232c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F9001233while (c <= 0xfa6d) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D1234charset[i++] = c++;1235c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA701236while (c <= 0xfad9) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD91237charset[i++] = c++;1238// FB00..FB4F; Alphabetic Presentation Forms1239c = 0xfb00; // from LATIN SMALL LIGATURE FF1240while (c <= 0xfb06) // ..to LATIN SMALL LIGATURE ST1241charset[i++] = c++;1242c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW1243while (c <= 0xfb17) // ..to ARMENIAN SMALL LIGATURE MEN XEH1244charset[i++] = c++;1245c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ1246while (c <= 0xfb36) // ..to HEBREW LETTER ZAYIN WITH DAGESH1247charset[i++] = c++;1248c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH1249while (c <= 0xfb3c) // ..to HEBREW LETTER LAMED WITH DAGESH1250charset[i++] = c++;1251charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH1252charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH1253charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH1254charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH1255c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH1256while (c <= 0xfb4f) // ..to HEBREW LIGATURE ALEF LAMED1257charset[i++] = c++;1258// FB50..FDFF; Arabic Presentation Forms-A1259c = 0xfb50; // from ARABIC LETTER ALEF WASLA ISOLATED FORM1260while (c <= 0xfbc1) // ..to ARABIC SYMBOL SMALL TAH BELOW1261charset[i++] = c++;1262c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM1263while (c <= 0xfd3f) // ..to ORNATE RIGHT PARENTHESIS1264charset[i++] = c++;1265c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM1266while (c <= 0xfd8f) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM1267charset[i++] = c++;1268c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM1269while (c <= 0xfdc7) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM1270charset[i++] = c++;1271c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM1272while (c <= 0xfdfd) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM1273charset[i++] = c++;1274// FE00..FE0F; Variation Selectors1275c = 0xfe00; // from VARIATION SELECTOR-11276while (c <= 0xfe0f) // ..to VARIATION SELECTOR-161277charset[i++] = c++;1278// FE10..FE1F; Vertical Forms1279c = 0xfe10; // from PRESENTATION FORM FOR VERTICAL COMMA1280while (c <= 0xfe19) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS1281charset[i++] = c++;1282// FE20..FE2F; Combining Half Marks1283c = 0xfe20; // from COMBINING LIGATURE LEFT HALF1284while (c <= 0xfe2f) // ..to COMBINING CYRILLIC TITLO RIGHT HALF1285charset[i++] = c++;1286// FE30..FE4F; CJK Compatibility Forms1287c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER1288while (c <= 0xfe4f) // ..to WAVY LOW LINE1289charset[i++] = c++;1290// FE50..FE6F; Small Form Variants1291charset[i++] = 0xfe50; // SMALL COMMA1292charset[i++] = 0xfe52; // SMALL FULL STOP1293c = 0xfe54; // from SMALL SEMICOLON1294while (c <= 0xfe66) // ..to SMALL EQUALS SIGN1295charset[i++] = c++;1296c = 0xfe68; // from SMALL REVERSE SOLIDUS1297while (c <= 0xfe6b) // ..to SMALL COMMERCIAL AT1298charset[i++] = c++;1299// FE70..FEFF; Arabic Presentation Forms-B1300c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM1301while (c <= 0xfe74) // ..to ARABIC KASRATAN ISOLATED FORM1302charset[i++] = c++;1303c = 0xfe76; // from ARABIC FATHA ISOLATED FORM1304while (c <= 0xfefc) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM1305charset[i++] = c++;1306charset[i++] = 0xfeff; // ZERO WIDTH NO-BREAK SPACE1307// FF00..FFEF; Halfwidth and Fullwidth Forms1308c = 0xff01; // from FULLWIDTH EXCLAMATION MARK1309while (c <= 0xffbe) // ..to HALFWIDTH HANGUL LETTER HIEUH1310charset[i++] = c++;1311c = 0xffc2; // from HALFWIDTH HANGUL LETTER A1312while (c <= 0xffc7) // ..to HALFWIDTH HANGUL LETTER E1313charset[i++] = c++;1314c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO1315while (c <= 0xffcf) // ..to HALFWIDTH HANGUL LETTER OE1316charset[i++] = c++;1317c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO1318while (c <= 0xffd7) // ..to HALFWIDTH HANGUL LETTER YU1319charset[i++] = c++;1320charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU1321charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I1322c = 0xffe0; // from FULLWIDTH CENT SIGN1323while (c <= 0xffe6) // ..to FULLWIDTH WON SIGN1324charset[i++] = c++;1325c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL1326while (c <= 0xffee) // ..to HALFWIDTH WHITE CIRCLE1327charset[i++] = c++;1328// FFF0..FFFF; Specials1329c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR1330while (c <= 0xfffd) // ..to REPLACEMENT CHARACTER1331charset[i++] = c++;13321333/* Zero-terminate it, and cache the first character */1334charset[i] = 0;1335c0 = charset[0];13361337last = minlength - 1;1338i = 0;1339while (i <= last) {1340id[i] = 0;1341word[i++] = c0;1342}1343lastid = -1;1344word[i] = 0;13451346/* We must init word with dummy data, it doesn't get set until filter() */1347word = 1;1348}13491350void generate()1351{1352int i;13531354/* Handle the typical case specially */1355if (word[last] = charset[++lastid]) return;13561357lastid = 0;1358word[i = last] = c0;1359while (i--) { // Have a preceding position?1360if (word[i] = charset[++id[i]]) return;1361id[i] = 0;1362word[i] = c0;1363}13641365if (++last < maxlength) { // Next length?1366id[last] = lastid = 0;1367word[last] = c0;1368word[last + 1] = 0;1369} else // We're done1370word = 0;1371}13721373void restore()1374{1375int i, c;13761377/* Calculate the current length and infer the character indices */1378last = 0;1379while (c = word[last]) {1380i = 0; while (charset[i] != c && charset[i]) i++;1381if (!charset[i]) i = 0; // Not found1382id[last++] = i;1383}1384lastid = id[--last];1385}138613871388