CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
rapid7

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/data/jtr/dumb32.conf
Views: 11766
1
# This software is Copyright (c) 2012-2020 magnum, and it is hereby
2
# released to the general public under the following terms:
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted.
5
#
6
# Generic implementation of "dumb" exhaustive search of FULL Unicode.
7
# Default is to try *all* allocated characters in Unicode v13 (there's
8
# 143,532 of them). Even if a fast format can exhaust two characters in one
9
# hour, three characters would take 12 years...
10
#
11
# Note that these modes will handle --max-len differently than normal: They
12
# will consider number of characters as opposed to number of bytes. This
13
# means you can naturally just use e.g. --max-len=3 for generating all
14
# three-character candidates (which may be up to 12 bytes each).
15
#
16
# Also note that for UTF-16 formats, the resulting plaintext size within the
17
# format will be up to four bytes (two 16-bit words) due to use of surrogates
18
# for characters above U+FFFF. This means a format which normally handles up
19
# to 27 characters may be limited to only 13 characters, worst case.
20
#
21
# Note that the (newer) cracking mode --subsets=full-unicode is way faster than
22
# this external mode, although not as easy to adapt to smaller portions of the
23
# Unicode space. See doc/SUBSETS
24
25
[List.External:Dumb32]
26
int maxlength; // Maximum password length to try
27
int last; // Last character position, zero-based
28
int lastid; // Character index in the last position
29
int id[0x7f]; // Current character indices for other positions
30
int charset[0x24000], c0; // Characters
31
32
void init()
33
{
34
int minlength;
35
int i, c;
36
37
# Trigger UTF-32 handling in External mode
38
utf32 = 1;
39
40
if (req_minlen)
41
minlength = req_minlen;
42
else
43
minlength = 1;
44
if (req_maxlen)
45
maxlength = req_maxlen;
46
else
47
maxlength = 2;
48
49
/*
50
* This defines the character set. This is auto-generated from UnicodeData.txt
51
* and we skip control characters.
52
*/
53
i = 0;
54
// 0000..007F; Basic Latin
55
c = 0x20; // from SPACE
56
while (c <= 0x7e) // ..to TILDE
57
charset[i++] = c++;
58
// 0080..00FF; Latin-1 Supplement
59
c = 0xa0; // from NO-BREAK SPACE
60
while (c <= 0xff) // ..to LATIN SMALL LETTER Y WITH DIAERESIS
61
charset[i++] = c++;
62
// 0100..017F; Latin Extended-A
63
c = 0x100; // from LATIN CAPITAL LETTER A WITH MACRON
64
while (c <= 0x17f) // ..to LATIN SMALL LETTER LONG S
65
charset[i++] = c++;
66
// 0180..024F; Latin Extended-B
67
c = 0x180; // from LATIN SMALL LETTER B WITH STROKE
68
while (c <= 0x24f) // ..to LATIN SMALL LETTER Y WITH STROKE
69
charset[i++] = c++;
70
// 0250..02AF; IPA Extensions
71
c = 0x250; // from LATIN SMALL LETTER TURNED A
72
while (c <= 0x2af) // ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
73
charset[i++] = c++;
74
// 02B0..02FF; Spacing Modifier Letters
75
c = 0x2b0; // from MODIFIER LETTER SMALL H
76
while (c <= 0x2ff) // ..to MODIFIER LETTER LOW LEFT ARROW
77
charset[i++] = c++;
78
// 0300..036F; Combining Diacritical Marks
79
c = 0x300; // from COMBINING GRAVE ACCENT
80
while (c <= 0x36f) // ..to COMBINING LATIN SMALL LETTER X
81
charset[i++] = c++;
82
// 0370..03FF; Greek and Coptic
83
c = 0x370; // from GREEK CAPITAL LETTER HETA
84
while (c <= 0x377) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
85
charset[i++] = c++;
86
c = 0x37a; // from GREEK YPOGEGRAMMENI
87
while (c <= 0x37f) // ..to GREEK CAPITAL LETTER YOT
88
charset[i++] = c++;
89
c = 0x384; // from GREEK TONOS
90
while (c <= 0x38a) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS
91
charset[i++] = c++;
92
c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS
93
while (c <= 0x3a1) // ..to GREEK CAPITAL LETTER RHO
94
charset[i++] = c++;
95
c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
96
while (c <= 0x3ff) // ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
97
charset[i++] = c++;
98
// 0400..04FF; Cyrillic
99
c = 0x400; // from CYRILLIC CAPITAL LETTER IE WITH GRAVE
100
while (c <= 0x4ff) // ..to CYRILLIC SMALL LETTER HA WITH STROKE
101
charset[i++] = c++;
102
// 0500..052F; Cyrillic Supplement
103
c = 0x500; // from CYRILLIC CAPITAL LETTER KOMI DE
104
while (c <= 0x52f) // ..to CYRILLIC SMALL LETTER EL WITH DESCENDER
105
charset[i++] = c++;
106
// 0530..058F; Armenian
107
c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
108
while (c <= 0x556) // ..to ARMENIAN CAPITAL LETTER FEH
109
charset[i++] = c++;
110
c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING
111
while (c <= 0x58a) // ..to ARMENIAN HYPHEN
112
charset[i++] = c++;
113
charset[i++] = 0x58d; // RIGHT-FACING ARMENIAN ETERNITY SIGN
114
charset[i++] = 0x58f; // ARMENIAN DRAM SIGN
115
// 0590..05FF; Hebrew
116
c = 0x591; // from HEBREW ACCENT ETNAHTA
117
while (c <= 0x5c7) // ..to HEBREW POINT QAMATS QATAN
118
charset[i++] = c++;
119
c = 0x5d0; // from HEBREW LETTER ALEF
120
while (c <= 0x5ea) // ..to HEBREW LETTER TAV
121
charset[i++] = c++;
122
c = 0x5ef; // from HEBREW YOD TRIANGLE
123
while (c <= 0x5f4) // ..to HEBREW PUNCTUATION GERSHAYIM
124
charset[i++] = c++;
125
// 0600..06FF; Arabic
126
c = 0x600; // from ARABIC NUMBER SIGN
127
while (c <= 0x61c) // ..to ARABIC LETTER MARK
128
charset[i++] = c++;
129
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
130
while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V
131
charset[i++] = c++;
132
// 0700..074F; Syriac
133
c = 0x700; // from SYRIAC END OF PARAGRAPH
134
while (c <= 0x70d) // ..to SYRIAC HARKLEAN ASTERISCUS
135
charset[i++] = c++;
136
c = 0x70f; // from SYRIAC ABBREVIATION MARK
137
while (c <= 0x74a) // ..to SYRIAC BARREKH
138
charset[i++] = c++;
139
charset[i++] = 0x74d; // SYRIAC LETTER SOGDIAN ZHAIN
140
charset[i++] = 0x74f; // SYRIAC LETTER SOGDIAN FE
141
// 0750..077F; Arabic Supplement
142
c = 0x750; // from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW
143
while (c <= 0x77f) // ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE
144
charset[i++] = c++;
145
// 0780..07BF; Thaana
146
c = 0x780; // from THAANA LETTER HAA
147
while (c <= 0x7b1) // ..to THAANA LETTER NAA
148
charset[i++] = c++;
149
// 07C0..07FF; NKo
150
c = 0x7c0; // from NKO DIGIT ZERO
151
while (c <= 0x7fa) // ..to NKO LAJANYALAN
152
charset[i++] = c++;
153
charset[i++] = 0x7fd; // NKO DANTAYALAN
154
charset[i++] = 0x7ff; // NKO TAMAN SIGN
155
// 0800..083F; Samaritan
156
c = 0x800; // from SAMARITAN LETTER ALAF
157
while (c <= 0x82d) // ..to SAMARITAN MARK NEQUDAA
158
charset[i++] = c++;
159
c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA
160
while (c <= 0x83e) // ..to SAMARITAN PUNCTUATION ANNAAU
161
charset[i++] = c++;
162
// 0840..085F; Mandaic
163
c = 0x840; // from MANDAIC LETTER HALQA
164
while (c <= 0x85b) // ..to MANDAIC GEMINATION MARK
165
charset[i++] = c++;
166
charset[i++] = 0x85e; // MANDAIC PUNCTUATION
167
// 0860..086F; Syriac Supplement
168
c = 0x860; // from SYRIAC LETTER MALAYALAM NGA
169
while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA
170
charset[i++] = c++;
171
// 08A0..08FF; Arabic Extended-A
172
c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW
173
while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW
174
charset[i++] = c++;
175
c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE
176
while (c <= 0x8c7) // ..to ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
177
charset[i++] = c++;
178
c = 0x8d3; // from ARABIC SMALL LOW WAW
179
while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA
180
charset[i++] = c++;
181
// 0900..097F; Devanagari
182
c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU
183
while (c <= 0x97f) // ..to DEVANAGARI LETTER BBA
184
charset[i++] = c++;
185
// 0980..09FF; Bengali
186
c = 0x980; // from BENGALI ANJI
187
while (c <= 0x983) // ..to BENGALI SIGN VISARGA
188
charset[i++] = c++;
189
c = 0x985; // from BENGALI LETTER A
190
while (c <= 0x98c) // ..to BENGALI LETTER VOCALIC L
191
charset[i++] = c++;
192
charset[i++] = 0x98f; // BENGALI LETTER E
193
charset[i++] = 0x990; // BENGALI LETTER AI
194
c = 0x993; // from BENGALI LETTER O
195
while (c <= 0x9a8) // ..to BENGALI LETTER NA
196
charset[i++] = c++;
197
c = 0x9aa; // from BENGALI LETTER PA
198
while (c <= 0x9b0) // ..to BENGALI LETTER RA
199
charset[i++] = c++;
200
c = 0x9b6; // from BENGALI LETTER SHA
201
while (c <= 0x9b9) // ..to BENGALI LETTER HA
202
charset[i++] = c++;
203
c = 0x9bc; // from BENGALI SIGN NUKTA
204
while (c <= 0x9c4) // ..to BENGALI VOWEL SIGN VOCALIC RR
205
charset[i++] = c++;
206
charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
207
charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
208
c = 0x9cb; // from BENGALI VOWEL SIGN O
209
while (c <= 0x9ce) // ..to BENGALI LETTER KHANDA TA
210
charset[i++] = c++;
211
charset[i++] = 0x9dc; // BENGALI LETTER RRA
212
charset[i++] = 0x9dd; // BENGALI LETTER RHA
213
c = 0x9df; // from BENGALI LETTER YYA
214
while (c <= 0x9e3) // ..to BENGALI VOWEL SIGN VOCALIC LL
215
charset[i++] = c++;
216
c = 0x9e6; // from BENGALI DIGIT ZERO
217
while (c <= 0x9fe) // ..to BENGALI SANDHI MARK
218
charset[i++] = c++;
219
// 0A00..0A7F; Gurmukhi
220
charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
221
charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
222
c = 0xa05; // from GURMUKHI LETTER A
223
while (c <= 0xa0a) // ..to GURMUKHI LETTER UU
224
charset[i++] = c++;
225
charset[i++] = 0xa0f; // GURMUKHI LETTER EE
226
charset[i++] = 0xa10; // GURMUKHI LETTER AI
227
c = 0xa13; // from GURMUKHI LETTER OO
228
while (c <= 0xa28) // ..to GURMUKHI LETTER NA
229
charset[i++] = c++;
230
c = 0xa2a; // from GURMUKHI LETTER PA
231
while (c <= 0xa30) // ..to GURMUKHI LETTER RA
232
charset[i++] = c++;
233
charset[i++] = 0xa32; // GURMUKHI LETTER LA
234
charset[i++] = 0xa33; // GURMUKHI LETTER LLA
235
charset[i++] = 0xa35; // GURMUKHI LETTER VA
236
charset[i++] = 0xa36; // GURMUKHI LETTER SHA
237
charset[i++] = 0xa38; // GURMUKHI LETTER SA
238
charset[i++] = 0xa39; // GURMUKHI LETTER HA
239
c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
240
while (c <= 0xa42) // ..to GURMUKHI VOWEL SIGN UU
241
charset[i++] = c++;
242
charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
243
charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
244
charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
245
charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
246
c = 0xa59; // from GURMUKHI LETTER KHHA
247
while (c <= 0xa5c) // ..to GURMUKHI LETTER RRA
248
charset[i++] = c++;
249
c = 0xa66; // from GURMUKHI DIGIT ZERO
250
while (c <= 0xa76) // ..to GURMUKHI ABBREVIATION SIGN
251
charset[i++] = c++;
252
// 0A80..0AFF; Gujarati
253
charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
254
charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
255
c = 0xa85; // from GUJARATI LETTER A
256
while (c <= 0xa8d) // ..to GUJARATI VOWEL CANDRA E
257
charset[i++] = c++;
258
charset[i++] = 0xa8f; // GUJARATI LETTER E
259
charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
260
c = 0xa93; // from GUJARATI LETTER O
261
while (c <= 0xaa8) // ..to GUJARATI LETTER NA
262
charset[i++] = c++;
263
c = 0xaaa; // from GUJARATI LETTER PA
264
while (c <= 0xab0) // ..to GUJARATI LETTER RA
265
charset[i++] = c++;
266
charset[i++] = 0xab2; // GUJARATI LETTER LA
267
charset[i++] = 0xab3; // GUJARATI LETTER LLA
268
c = 0xab5; // from GUJARATI LETTER VA
269
while (c <= 0xab9) // ..to GUJARATI LETTER HA
270
charset[i++] = c++;
271
c = 0xabc; // from GUJARATI SIGN NUKTA
272
while (c <= 0xac5) // ..to GUJARATI VOWEL SIGN CANDRA E
273
charset[i++] = c++;
274
charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
275
charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
276
charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
277
charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
278
c = 0xae0; // from GUJARATI LETTER VOCALIC RR
279
while (c <= 0xae3) // ..to GUJARATI VOWEL SIGN VOCALIC LL
280
charset[i++] = c++;
281
c = 0xae6; // from GUJARATI DIGIT ZERO
282
while (c <= 0xaf1) // ..to GUJARATI RUPEE SIGN
283
charset[i++] = c++;
284
c = 0xaf9; // from GUJARATI LETTER ZHA
285
while (c <= 0xaff) // ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
286
charset[i++] = c++;
287
// 0B00..0B7F; Oriya
288
charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
289
charset[i++] = 0xb03; // ORIYA SIGN VISARGA
290
c = 0xb05; // from ORIYA LETTER A
291
while (c <= 0xb0c) // ..to ORIYA LETTER VOCALIC L
292
charset[i++] = c++;
293
charset[i++] = 0xb0f; // ORIYA LETTER E
294
charset[i++] = 0xb10; // ORIYA LETTER AI
295
c = 0xb13; // from ORIYA LETTER O
296
while (c <= 0xb28) // ..to ORIYA LETTER NA
297
charset[i++] = c++;
298
c = 0xb2a; // from ORIYA LETTER PA
299
while (c <= 0xb30) // ..to ORIYA LETTER RA
300
charset[i++] = c++;
301
charset[i++] = 0xb32; // ORIYA LETTER LA
302
charset[i++] = 0xb33; // ORIYA LETTER LLA
303
c = 0xb35; // from ORIYA LETTER VA
304
while (c <= 0xb39) // ..to ORIYA LETTER HA
305
charset[i++] = c++;
306
c = 0xb3c; // from ORIYA SIGN NUKTA
307
while (c <= 0xb44) // ..to ORIYA VOWEL SIGN VOCALIC RR
308
charset[i++] = c++;
309
charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
310
charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
311
charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
312
charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
313
charset[i++] = 0xb55; // ORIYA SIGN OVERLINE
314
charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
315
charset[i++] = 0xb5c; // ORIYA LETTER RRA
316
charset[i++] = 0xb5d; // ORIYA LETTER RHA
317
c = 0xb5f; // from ORIYA LETTER YYA
318
while (c <= 0xb63) // ..to ORIYA VOWEL SIGN VOCALIC LL
319
charset[i++] = c++;
320
c = 0xb66; // from ORIYA DIGIT ZERO
321
while (c <= 0xb77) // ..to ORIYA FRACTION THREE SIXTEENTHS
322
charset[i++] = c++;
323
// 0B80..0BFF; Tamil
324
charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
325
charset[i++] = 0xb83; // TAMIL SIGN VISARGA
326
c = 0xb85; // from TAMIL LETTER A
327
while (c <= 0xb8a) // ..to TAMIL LETTER UU
328
charset[i++] = c++;
329
charset[i++] = 0xb8e; // TAMIL LETTER E
330
charset[i++] = 0xb90; // TAMIL LETTER AI
331
c = 0xb92; // from TAMIL LETTER O
332
while (c <= 0xb95) // ..to TAMIL LETTER KA
333
charset[i++] = c++;
334
charset[i++] = 0xb99; // TAMIL LETTER NGA
335
charset[i++] = 0xb9a; // TAMIL LETTER CA
336
charset[i++] = 0xb9e; // TAMIL LETTER NYA
337
charset[i++] = 0xb9f; // TAMIL LETTER TTA
338
charset[i++] = 0xba3; // TAMIL LETTER NNA
339
charset[i++] = 0xba4; // TAMIL LETTER TA
340
charset[i++] = 0xba8; // TAMIL LETTER NA
341
charset[i++] = 0xbaa; // TAMIL LETTER PA
342
c = 0xbae; // from TAMIL LETTER MA
343
while (c <= 0xbb9) // ..to TAMIL LETTER HA
344
charset[i++] = c++;
345
c = 0xbbe; // from TAMIL VOWEL SIGN AA
346
while (c <= 0xbc2) // ..to TAMIL VOWEL SIGN UU
347
charset[i++] = c++;
348
charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
349
charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
350
c = 0xbca; // from TAMIL VOWEL SIGN O
351
while (c <= 0xbcd) // ..to TAMIL SIGN VIRAMA
352
charset[i++] = c++;
353
c = 0xbe6; // from TAMIL DIGIT ZERO
354
while (c <= 0xbfa) // ..to TAMIL NUMBER SIGN
355
charset[i++] = c++;
356
// 0C00..0C7F; Telugu
357
c = 0xc00; // from TELUGU SIGN COMBINING CANDRABINDU ABOVE
358
while (c <= 0xc0c) // ..to TELUGU LETTER VOCALIC L
359
charset[i++] = c++;
360
charset[i++] = 0xc0e; // TELUGU LETTER E
361
charset[i++] = 0xc10; // TELUGU LETTER AI
362
c = 0xc12; // from TELUGU LETTER O
363
while (c <= 0xc28) // ..to TELUGU LETTER NA
364
charset[i++] = c++;
365
c = 0xc2a; // from TELUGU LETTER PA
366
while (c <= 0xc39) // ..to TELUGU LETTER HA
367
charset[i++] = c++;
368
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
369
while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR
370
charset[i++] = c++;
371
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
372
charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
373
c = 0xc4a; // from TELUGU VOWEL SIGN O
374
while (c <= 0xc4d) // ..to TELUGU SIGN VIRAMA
375
charset[i++] = c++;
376
charset[i++] = 0xc55; // TELUGU LENGTH MARK
377
charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
378
charset[i++] = 0xc58; // TELUGU LETTER TSA
379
charset[i++] = 0xc5a; // TELUGU LETTER RRRA
380
c = 0xc60; // from TELUGU LETTER VOCALIC RR
381
while (c <= 0xc63) // ..to TELUGU VOWEL SIGN VOCALIC LL
382
charset[i++] = c++;
383
c = 0xc66; // from TELUGU DIGIT ZERO
384
while (c <= 0xc6f) // ..to TELUGU DIGIT NINE
385
charset[i++] = c++;
386
c = 0xc77; // from TELUGU SIGN SIDDHAM
387
while (c <= 0xc7f) // ..to TELUGU SIGN TUUMU
388
charset[i++] = c++;
389
// 0C80..0CFF; Kannada
390
c = 0xc80; // from KANNADA SIGN SPACING CANDRABINDU
391
while (c <= 0xc8c) // ..to KANNADA LETTER VOCALIC L
392
charset[i++] = c++;
393
charset[i++] = 0xc8e; // KANNADA LETTER E
394
charset[i++] = 0xc90; // KANNADA LETTER AI
395
c = 0xc92; // from KANNADA LETTER O
396
while (c <= 0xca8) // ..to KANNADA LETTER NA
397
charset[i++] = c++;
398
c = 0xcaa; // from KANNADA LETTER PA
399
while (c <= 0xcb3) // ..to KANNADA LETTER LLA
400
charset[i++] = c++;
401
c = 0xcb5; // from KANNADA LETTER VA
402
while (c <= 0xcb9) // ..to KANNADA LETTER HA
403
charset[i++] = c++;
404
c = 0xcbc; // from KANNADA SIGN NUKTA
405
while (c <= 0xcc4) // ..to KANNADA VOWEL SIGN VOCALIC RR
406
charset[i++] = c++;
407
charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
408
charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
409
c = 0xcca; // from KANNADA VOWEL SIGN O
410
while (c <= 0xccd) // ..to KANNADA SIGN VIRAMA
411
charset[i++] = c++;
412
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
413
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
414
c = 0xce0; // from KANNADA LETTER VOCALIC RR
415
while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL
416
charset[i++] = c++;
417
c = 0xce6; // from KANNADA DIGIT ZERO
418
while (c <= 0xcef) // ..to KANNADA DIGIT NINE
419
charset[i++] = c++;
420
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
421
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
422
// 0D00..0D7F; Malayalam
423
c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE
424
while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L
425
charset[i++] = c++;
426
charset[i++] = 0xd0e; // MALAYALAM LETTER E
427
charset[i++] = 0xd10; // MALAYALAM LETTER AI
428
c = 0xd12; // from MALAYALAM LETTER O
429
while (c <= 0xd44) // ..to MALAYALAM VOWEL SIGN VOCALIC RR
430
charset[i++] = c++;
431
charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
432
charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
433
c = 0xd4a; // from MALAYALAM VOWEL SIGN O
434
while (c <= 0xd4f) // ..to MALAYALAM SIGN PARA
435
charset[i++] = c++;
436
c = 0xd54; // from MALAYALAM LETTER CHILLU M
437
while (c <= 0xd63) // ..to MALAYALAM VOWEL SIGN VOCALIC LL
438
charset[i++] = c++;
439
c = 0xd66; // from MALAYALAM DIGIT ZERO
440
while (c <= 0xd7f) // ..to MALAYALAM LETTER CHILLU K
441
charset[i++] = c++;
442
// 0D80..0DFF; Sinhala
443
charset[i++] = 0xd81; // SINHALA SIGN CANDRABINDU
444
charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
445
c = 0xd85; // from SINHALA LETTER AYANNA
446
while (c <= 0xd96) // ..to SINHALA LETTER AUYANNA
447
charset[i++] = c++;
448
c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA
449
while (c <= 0xdb1) // ..to SINHALA LETTER DANTAJA NAYANNA
450
charset[i++] = c++;
451
c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA
452
while (c <= 0xdbb) // ..to SINHALA LETTER RAYANNA
453
charset[i++] = c++;
454
c = 0xdc0; // from SINHALA LETTER VAYANNA
455
while (c <= 0xdc6) // ..to SINHALA LETTER FAYANNA
456
charset[i++] = c++;
457
c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
458
while (c <= 0xdd4) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
459
charset[i++] = c++;
460
c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA
461
while (c <= 0xddf) // ..to SINHALA VOWEL SIGN GAYANUKITTA
462
charset[i++] = c++;
463
c = 0xde6; // from SINHALA LITH DIGIT ZERO
464
while (c <= 0xdef) // ..to SINHALA LITH DIGIT NINE
465
charset[i++] = c++;
466
charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
467
charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
468
// 0E00..0E7F; Thai
469
c = 0xe01; // from THAI CHARACTER KO KAI
470
while (c <= 0xe3a) // ..to THAI CHARACTER PHINTHU
471
charset[i++] = c++;
472
c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
473
while (c <= 0xe5b) // ..to THAI CHARACTER KHOMUT
474
charset[i++] = c++;
475
// 0E80..0EFF; Lao
476
charset[i++] = 0xe81; // LAO LETTER KO
477
charset[i++] = 0xe82; // LAO LETTER KHO SUNG
478
c = 0xe86; // from LAO LETTER PALI GHA
479
while (c <= 0xe8a) // ..to LAO LETTER SO TAM
480
charset[i++] = c++;
481
c = 0xe8c; // from LAO LETTER PALI JHA
482
while (c <= 0xea3) // ..to LAO LETTER LO LING
483
charset[i++] = c++;
484
c = 0xea7; // from LAO LETTER WO
485
while (c <= 0xebd) // ..to LAO SEMIVOWEL SIGN NYO
486
charset[i++] = c++;
487
c = 0xec0; // from LAO VOWEL SIGN E
488
while (c <= 0xec4) // ..to LAO VOWEL SIGN AI
489
charset[i++] = c++;
490
c = 0xec8; // from LAO TONE MAI EK
491
while (c <= 0xecd) // ..to LAO NIGGAHITA
492
charset[i++] = c++;
493
c = 0xed0; // from LAO DIGIT ZERO
494
while (c <= 0xed9) // ..to LAO DIGIT NINE
495
charset[i++] = c++;
496
c = 0xedc; // from LAO HO NO
497
while (c <= 0xedf) // ..to LAO LETTER KHMU NYO
498
charset[i++] = c++;
499
// 0F00..0FFF; Tibetan
500
c = 0xf00; // from TIBETAN SYLLABLE OM
501
while (c <= 0xf47) // ..to TIBETAN LETTER JA
502
charset[i++] = c++;
503
c = 0xf49; // from TIBETAN LETTER NYA
504
while (c <= 0xf6c) // ..to TIBETAN LETTER RRA
505
charset[i++] = c++;
506
c = 0xf71; // from TIBETAN VOWEL SIGN AA
507
while (c <= 0xf97) // ..to TIBETAN SUBJOINED LETTER JA
508
charset[i++] = c++;
509
c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA
510
while (c <= 0xfbc) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
511
charset[i++] = c++;
512
c = 0xfbe; // from TIBETAN KU RU KHA
513
while (c <= 0xfcc) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
514
charset[i++] = c++;
515
c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
516
while (c <= 0xfda) // ..to TIBETAN MARK TRAILING MCHAN RTAGS
517
charset[i++] = c++;
518
// 1000..109F; Myanmar
519
c = 0x1000; // from MYANMAR LETTER KA
520
while (c <= 0x109f) // ..to MYANMAR SYMBOL SHAN EXCLAMATION
521
charset[i++] = c++;
522
// 10A0..10FF; Georgian
523
c = 0x10a0; // from GEORGIAN CAPITAL LETTER AN
524
while (c <= 0x10c5) // ..to GEORGIAN CAPITAL LETTER HOE
525
charset[i++] = c++;
526
c = 0x10d0; // from GEORGIAN LETTER AN
527
while (c <= 0x10ff) // ..to GEORGIAN LETTER LABIAL SIGN
528
charset[i++] = c++;
529
// 1100..11FF; Hangul Jamo
530
c = 0x1100; // from HANGUL CHOSEONG KIYEOK
531
while (c <= 0x11ff) // ..to HANGUL JONGSEONG SSANGNIEUN
532
charset[i++] = c++;
533
// 1200..137F; Ethiopic
534
c = 0x1200; // from ETHIOPIC SYLLABLE HA
535
while (c <= 0x1248) // ..to ETHIOPIC SYLLABLE QWA
536
charset[i++] = c++;
537
c = 0x124a; // from ETHIOPIC SYLLABLE QWI
538
while (c <= 0x124d) // ..to ETHIOPIC SYLLABLE QWE
539
charset[i++] = c++;
540
c = 0x1250; // from ETHIOPIC SYLLABLE QHA
541
while (c <= 0x1256) // ..to ETHIOPIC SYLLABLE QHO
542
charset[i++] = c++;
543
c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
544
while (c <= 0x125d) // ..to ETHIOPIC SYLLABLE QHWE
545
charset[i++] = c++;
546
c = 0x1260; // from ETHIOPIC SYLLABLE BA
547
while (c <= 0x1288) // ..to ETHIOPIC SYLLABLE XWA
548
charset[i++] = c++;
549
c = 0x128a; // from ETHIOPIC SYLLABLE XWI
550
while (c <= 0x128d) // ..to ETHIOPIC SYLLABLE XWE
551
charset[i++] = c++;
552
c = 0x1290; // from ETHIOPIC SYLLABLE NA
553
while (c <= 0x12b0) // ..to ETHIOPIC SYLLABLE KWA
554
charset[i++] = c++;
555
c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
556
while (c <= 0x12b5) // ..to ETHIOPIC SYLLABLE KWE
557
charset[i++] = c++;
558
c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
559
while (c <= 0x12be) // ..to ETHIOPIC SYLLABLE KXO
560
charset[i++] = c++;
561
c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
562
while (c <= 0x12c5) // ..to ETHIOPIC SYLLABLE KXWE
563
charset[i++] = c++;
564
c = 0x12c8; // from ETHIOPIC SYLLABLE WA
565
while (c <= 0x12d6) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O
566
charset[i++] = c++;
567
c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
568
while (c <= 0x1310) // ..to ETHIOPIC SYLLABLE GWA
569
charset[i++] = c++;
570
c = 0x1312; // from ETHIOPIC SYLLABLE GWI
571
while (c <= 0x1315) // ..to ETHIOPIC SYLLABLE GWE
572
charset[i++] = c++;
573
c = 0x1318; // from ETHIOPIC SYLLABLE GGA
574
while (c <= 0x135a) // ..to ETHIOPIC SYLLABLE FYA
575
charset[i++] = c++;
576
c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
577
while (c <= 0x137c) // ..to ETHIOPIC NUMBER TEN THOUSAND
578
charset[i++] = c++;
579
// 1380..139F; Ethiopic Supplement
580
c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA
581
while (c <= 0x1399) // ..to ETHIOPIC TONAL MARK KURT
582
charset[i++] = c++;
583
// 13A0..13FF; Cherokee
584
c = 0x13a0; // from CHEROKEE LETTER A
585
while (c <= 0x13f5) // ..to CHEROKEE LETTER MV
586
charset[i++] = c++;
587
c = 0x13f8; // from CHEROKEE SMALL LETTER YE
588
while (c <= 0x13fd) // ..to CHEROKEE SMALL LETTER MV
589
charset[i++] = c++;
590
// 1400..167F; Unified Canadian Aboriginal Syllabics
591
c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
592
while (c <= 0x167f) // ..to CANADIAN SYLLABICS BLACKFOOT W
593
charset[i++] = c++;
594
// 1680..169F; Ogham
595
c = 0x1680; // from OGHAM SPACE MARK
596
while (c <= 0x169c) // ..to OGHAM REVERSED FEATHER MARK
597
charset[i++] = c++;
598
// 16A0..16FF; Runic
599
c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
600
while (c <= 0x16f8) // ..to RUNIC LETTER FRANKS CASKET AESC
601
charset[i++] = c++;
602
// 1700..171F; Tagalog
603
c = 0x1700; // from TAGALOG LETTER A
604
while (c <= 0x170c) // ..to TAGALOG LETTER YA
605
charset[i++] = c++;
606
c = 0x170e; // from TAGALOG LETTER LA
607
while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA
608
charset[i++] = c++;
609
// 1720..173F; Hanunoo
610
c = 0x1720; // from HANUNOO LETTER A
611
while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION
612
charset[i++] = c++;
613
// 1740..175F; Buhid
614
c = 0x1740; // from BUHID LETTER A
615
while (c <= 0x1753) // ..to BUHID VOWEL SIGN U
616
charset[i++] = c++;
617
// 1760..177F; Tagbanwa
618
c = 0x1760; // from TAGBANWA LETTER A
619
while (c <= 0x176c) // ..to TAGBANWA LETTER YA
620
charset[i++] = c++;
621
charset[i++] = 0x176e; // TAGBANWA LETTER LA
622
charset[i++] = 0x1770; // TAGBANWA LETTER SA
623
charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
624
charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
625
// 1780..17FF; Khmer
626
c = 0x1780; // from KHMER LETTER KA
627
while (c <= 0x17dd) // ..to KHMER SIGN ATTHACAN
628
charset[i++] = c++;
629
c = 0x17e0; // from KHMER DIGIT ZERO
630
while (c <= 0x17e9) // ..to KHMER DIGIT NINE
631
charset[i++] = c++;
632
c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON
633
while (c <= 0x17f9) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
634
charset[i++] = c++;
635
// 1800..18AF; Mongolian
636
c = 0x1800; // from MONGOLIAN BIRGA
637
while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR
638
charset[i++] = c++;
639
c = 0x1810; // from MONGOLIAN DIGIT ZERO
640
while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE
641
charset[i++] = c++;
642
c = 0x1820; // from MONGOLIAN LETTER A
643
while (c <= 0x1878) // ..to MONGOLIAN LETTER CHA WITH TWO DOTS
644
charset[i++] = c++;
645
c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
646
while (c <= 0x18aa) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
647
charset[i++] = c++;
648
// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
649
c = 0x18b0; // from CANADIAN SYLLABICS OY
650
while (c <= 0x18f5) // ..to CANADIAN SYLLABICS CARRIER DENTAL S
651
charset[i++] = c++;
652
// 1900..194F; Limbu
653
c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER
654
while (c <= 0x191e) // ..to LIMBU LETTER TRA
655
charset[i++] = c++;
656
c = 0x1920; // from LIMBU VOWEL SIGN A
657
while (c <= 0x192b) // ..to LIMBU SUBJOINED LETTER WA
658
charset[i++] = c++;
659
c = 0x1930; // from LIMBU SMALL LETTER KA
660
while (c <= 0x193b) // ..to LIMBU SIGN SA-I
661
charset[i++] = c++;
662
c = 0x1944; // from LIMBU EXCLAMATION MARK
663
while (c <= 0x194f) // ..to LIMBU DIGIT NINE
664
charset[i++] = c++;
665
// 1950..197F; Tai Le
666
c = 0x1950; // from TAI LE LETTER KA
667
while (c <= 0x196d) // ..to TAI LE LETTER AI
668
charset[i++] = c++;
669
c = 0x1970; // from TAI LE LETTER TONE-2
670
while (c <= 0x1974) // ..to TAI LE LETTER TONE-6
671
charset[i++] = c++;
672
// 1980..19DF; New Tai Lue
673
c = 0x1980; // from NEW TAI LUE LETTER HIGH QA
674
while (c <= 0x19ab) // ..to NEW TAI LUE LETTER LOW SUA
675
charset[i++] = c++;
676
c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
677
while (c <= 0x19c9) // ..to NEW TAI LUE TONE MARK-2
678
charset[i++] = c++;
679
c = 0x19d0; // from NEW TAI LUE DIGIT ZERO
680
while (c <= 0x19da) // ..to NEW TAI LUE THAM DIGIT ONE
681
charset[i++] = c++;
682
charset[i++] = 0x19de; // NEW TAI LUE SIGN LAE
683
charset[i++] = 0x19df; // NEW TAI LUE SIGN LAEV
684
// 19E0..19FF; Khmer Symbols
685
c = 0x19e0; // from KHMER SYMBOL PATHAMASAT
686
while (c <= 0x19ff) // ..to KHMER SYMBOL DAP-PRAM ROC
687
charset[i++] = c++;
688
// 1A00..1A1F; Buginese
689
c = 0x1a00; // from BUGINESE LETTER KA
690
while (c <= 0x1a1b) // ..to BUGINESE VOWEL SIGN AE
691
charset[i++] = c++;
692
charset[i++] = 0x1a1e; // BUGINESE PALLAWA
693
charset[i++] = 0x1a1f; // BUGINESE END OF SECTION
694
// 1A20..1AAF; Tai Tham
695
c = 0x1a20; // from TAI THAM LETTER HIGH KA
696
while (c <= 0x1a5e) // ..to TAI THAM CONSONANT SIGN SA
697
charset[i++] = c++;
698
c = 0x1a60; // from TAI THAM SIGN SAKOT
699
while (c <= 0x1a7c) // ..to TAI THAM SIGN KHUEN-LUE KARAN
700
charset[i++] = c++;
701
c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT
702
while (c <= 0x1a89) // ..to TAI THAM HORA DIGIT NINE
703
charset[i++] = c++;
704
c = 0x1a90; // from TAI THAM THAM DIGIT ZERO
705
while (c <= 0x1a99) // ..to TAI THAM THAM DIGIT NINE
706
charset[i++] = c++;
707
c = 0x1aa0; // from TAI THAM SIGN WIANG
708
while (c <= 0x1aad) // ..to TAI THAM SIGN CAANG
709
charset[i++] = c++;
710
// 1AB0..1AFF; Combining Diacritical Marks Extended
711
c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT
712
while (c <= 0x1ac0) // ..to COMBINING LATIN SMALL LETTER TURNED W BELOW
713
charset[i++] = c++;
714
// 1B00..1B7F; Balinese
715
c = 0x1b00; // from BALINESE SIGN ULU RICEM
716
while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK
717
charset[i++] = c++;
718
c = 0x1b50; // from BALINESE DIGIT ZERO
719
while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
720
charset[i++] = c++;
721
// 1B80..1BBF; Sundanese
722
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
723
while (c <= 0x1bbf) // ..to SUNDANESE LETTER FINAL M
724
charset[i++] = c++;
725
// 1BC0..1BFF; Batak
726
c = 0x1bc0; // from BATAK LETTER A
727
while (c <= 0x1bf3) // ..to BATAK PANONGONAN
728
charset[i++] = c++;
729
c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK
730
while (c <= 0x1bff) // ..to BATAK SYMBOL BINDU PANGOLAT
731
charset[i++] = c++;
732
// 1C00..1C4F; Lepcha
733
c = 0x1c00; // from LEPCHA LETTER KA
734
while (c <= 0x1c37) // ..to LEPCHA SIGN NUKTA
735
charset[i++] = c++;
736
c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL
737
while (c <= 0x1c49) // ..to LEPCHA DIGIT NINE
738
charset[i++] = c++;
739
charset[i++] = 0x1c4d; // LEPCHA LETTER TTA
740
charset[i++] = 0x1c4f; // LEPCHA LETTER DDA
741
// 1C50..1C7F; Ol Chiki
742
c = 0x1c50; // from OL CHIKI DIGIT ZERO
743
while (c <= 0x1c7f) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
744
charset[i++] = c++;
745
// 1C80..1C8F; Cyrillic Extended-C
746
c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE
747
while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK
748
charset[i++] = c++;
749
// 1C90..1CBF; Georgian Extended
750
c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN
751
while (c <= 0x1cba) // ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN
752
charset[i++] = c++;
753
charset[i++] = 0x1cbd; // GEORGIAN MTAVRULI CAPITAL LETTER AEN
754
charset[i++] = 0x1cbf; // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
755
// 1CC0..1CCF; Sundanese Supplement
756
c = 0x1cc0; // from SUNDANESE PUNCTUATION BINDU SURYA
757
while (c <= 0x1cc7) // ..to SUNDANESE PUNCTUATION BINDU BA SATANGA
758
charset[i++] = c++;
759
// 1CD0..1CFF; Vedic Extensions
760
c = 0x1cd0; // from VEDIC TONE KARSHANA
761
while (c <= 0x1cfa) // ..to VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
762
charset[i++] = c++;
763
// 1D00..1D7F; Phonetic Extensions
764
c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A
765
while (c <= 0x1d7f) // ..to LATIN SMALL LETTER UPSILON WITH STROKE
766
charset[i++] = c++;
767
// 1D80..1DBF; Phonetic Extensions Supplement
768
c = 0x1d80; // from LATIN SMALL LETTER B WITH PALATAL HOOK
769
while (c <= 0x1dbf) // ..to MODIFIER LETTER SMALL THETA
770
charset[i++] = c++;
771
// 1DC0..1DFF; Combining Diacritical Marks Supplement
772
c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT
773
while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW
774
charset[i++] = c++;
775
c = 0x1dfb; // from COMBINING DELETION MARK
776
while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
777
charset[i++] = c++;
778
// 1E00..1EFF; Latin Extended Additional
779
c = 0x1e00; // from LATIN CAPITAL LETTER A WITH RING BELOW
780
while (c <= 0x1eff) // ..to LATIN SMALL LETTER Y WITH LOOP
781
charset[i++] = c++;
782
// 1F00..1FFF; Greek Extended
783
c = 0x1f00; // from GREEK SMALL LETTER ALPHA WITH PSILI
784
while (c <= 0x1f15) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
785
charset[i++] = c++;
786
c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI
787
while (c <= 0x1f1d) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
788
charset[i++] = c++;
789
c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI
790
while (c <= 0x1f45) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
791
charset[i++] = c++;
792
c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI
793
while (c <= 0x1f4d) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
794
charset[i++] = c++;
795
c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI
796
while (c <= 0x1f57) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
797
charset[i++] = c++;
798
c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
799
while (c <= 0x1f7d) // ..to GREEK SMALL LETTER OMEGA WITH OXIA
800
charset[i++] = c++;
801
c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
802
while (c <= 0x1fb4) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
803
charset[i++] = c++;
804
c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
805
while (c <= 0x1fc4) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
806
charset[i++] = c++;
807
c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI
808
while (c <= 0x1fd3) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
809
charset[i++] = c++;
810
c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI
811
while (c <= 0x1fdb) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA
812
charset[i++] = c++;
813
c = 0x1fdd; // from GREEK DASIA AND VARIA
814
while (c <= 0x1fef) // ..to GREEK VARIA
815
charset[i++] = c++;
816
charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
817
charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
818
c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
819
while (c <= 0x1ffe) // ..to GREEK DASIA
820
charset[i++] = c++;
821
// 2000..206F; General Punctuation
822
c = 0x2000; // from EN QUAD
823
while (c <= 0x2064) // ..to INVISIBLE PLUS
824
charset[i++] = c++;
825
c = 0x2066; // from LEFT-TO-RIGHT ISOLATE
826
while (c <= 0x206f) // ..to NOMINAL DIGIT SHAPES
827
charset[i++] = c++;
828
// 2070..209F; Superscripts and Subscripts
829
charset[i++] = 0x2070; // SUPERSCRIPT ZERO
830
charset[i++] = 0x2071; // SUPERSCRIPT LATIN SMALL LETTER I
831
c = 0x2074; // from SUPERSCRIPT FOUR
832
while (c <= 0x208e) // ..to SUBSCRIPT RIGHT PARENTHESIS
833
charset[i++] = c++;
834
c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A
835
while (c <= 0x209c) // ..to LATIN SUBSCRIPT SMALL LETTER T
836
charset[i++] = c++;
837
// 20A0..20CF; Currency Symbols
838
c = 0x20a0; // from EURO-CURRENCY SIGN
839
while (c <= 0x20bf) // ..to BITCOIN SIGN
840
charset[i++] = c++;
841
// 20D0..20FF; Combining Diacritical Marks for Symbols
842
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
843
while (c <= 0x20f0) // ..to COMBINING ASTERISK ABOVE
844
charset[i++] = c++;
845
// 2100..214F; Letterlike Symbols
846
c = 0x2100; // from ACCOUNT OF
847
while (c <= 0x214f) // ..to SYMBOL FOR SAMARITAN SOURCE
848
charset[i++] = c++;
849
// 2150..218F; Number Forms
850
c = 0x2150; // from VULGAR FRACTION ONE SEVENTH
851
while (c <= 0x218b) // ..to TURNED DIGIT THREE
852
charset[i++] = c++;
853
// 2190..21FF; Arrows
854
c = 0x2190; // from LEFTWARDS ARROW
855
while (c <= 0x21ff) // ..to LEFT RIGHT OPEN-HEADED ARROW
856
charset[i++] = c++;
857
// 2200..22FF; Mathematical Operators
858
c = 0x2200; // from FOR ALL
859
while (c <= 0x22ff) // ..to Z NOTATION BAG MEMBERSHIP
860
charset[i++] = c++;
861
// 2300..23FF; Miscellaneous Technical
862
c = 0x2300; // from DIAMETER SIGN
863
while (c <= 0x23ff) // ..to OBSERVER EYE SYMBOL
864
charset[i++] = c++;
865
// 2400..243F; Control Pictures
866
c = 0x2400; // from SYMBOL FOR NULL
867
while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
868
charset[i++] = c++;
869
// 2440..245F; Optical Character Recognition
870
c = 0x2440; // from OCR HOOK
871
while (c <= 0x244a) // ..to OCR DOUBLE BACKSLASH
872
charset[i++] = c++;
873
// 2460..24FF; Enclosed Alphanumerics
874
c = 0x2460; // from CIRCLED DIGIT ONE
875
while (c <= 0x24ff) // ..to NEGATIVE CIRCLED DIGIT ZERO
876
charset[i++] = c++;
877
// 2500..257F; Box Drawing
878
c = 0x2500; // from BOX DRAWINGS LIGHT HORIZONTAL
879
while (c <= 0x257f) // ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN
880
charset[i++] = c++;
881
// 2580..259F; Block Elements
882
c = 0x2580; // from UPPER HALF BLOCK
883
while (c <= 0x259f) // ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT
884
charset[i++] = c++;
885
// 25A0..25FF; Geometric Shapes
886
c = 0x25a0; // from BLACK SQUARE
887
while (c <= 0x25ff) // ..to LOWER RIGHT TRIANGLE
888
charset[i++] = c++;
889
// 2600..26FF; Miscellaneous Symbols
890
c = 0x2600; // from BLACK SUN WITH RAYS
891
while (c <= 0x26ff) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
892
charset[i++] = c++;
893
// 2700..27BF; Dingbats
894
c = 0x2700; // from BLACK SAFETY SCISSORS
895
while (c <= 0x27bf) // ..to DOUBLE CURLY LOOP
896
charset[i++] = c++;
897
// 27C0..27EF; Miscellaneous Mathematical Symbols-A
898
c = 0x27c0; // from THREE DIMENSIONAL ANGLE
899
while (c <= 0x27ef) // ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS
900
charset[i++] = c++;
901
// 27F0..27FF; Supplemental Arrows-A
902
c = 0x27f0; // from UPWARDS QUADRUPLE ARROW
903
while (c <= 0x27ff) // ..to LONG RIGHTWARDS SQUIGGLE ARROW
904
charset[i++] = c++;
905
// 2800..28FF; Braille Patterns
906
c = 0x2800; // from BRAILLE PATTERN BLANK
907
while (c <= 0x28ff) // ..to BRAILLE PATTERN DOTS-12345678
908
charset[i++] = c++;
909
// 2900..297F; Supplemental Arrows-B
910
c = 0x2900; // from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE
911
while (c <= 0x297f) // ..to DOWN FISH TAIL
912
charset[i++] = c++;
913
// 2980..29FF; Miscellaneous Mathematical Symbols-B
914
c = 0x2980; // from TRIPLE VERTICAL BAR DELIMITER
915
while (c <= 0x29ff) // ..to MINY
916
charset[i++] = c++;
917
// 2A00..2AFF; Supplemental Mathematical Operators
918
c = 0x2a00; // from N-ARY CIRCLED DOT OPERATOR
919
while (c <= 0x2aff) // ..to N-ARY WHITE VERTICAL BAR
920
charset[i++] = c++;
921
// 2B00..2BFF; Miscellaneous Symbols and Arrows
922
c = 0x2b00; // from NORTH EAST WHITE ARROW
923
while (c <= 0x2b73) // ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
924
charset[i++] = c++;
925
c = 0x2b76; // from NORTH WEST TRIANGLE-HEADED ARROW TO BAR
926
while (c <= 0x2b95) // ..to RIGHTWARDS BLACK ARROW
927
charset[i++] = c++;
928
c = 0x2b97; // from SYMBOL FOR TYPE A ELECTRONICS
929
while (c <= 0x2bff) // ..to HELLSCHREIBER PAUSE SYMBOL
930
charset[i++] = c++;
931
// 2C00..2C5F; Glagolitic
932
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
933
while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
934
charset[i++] = c++;
935
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
936
while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
937
charset[i++] = c++;
938
// 2C60..2C7F; Latin Extended-C
939
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
940
while (c <= 0x2c7f) // ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL
941
charset[i++] = c++;
942
// 2C80..2CFF; Coptic
943
c = 0x2c80; // from COPTIC CAPITAL LETTER ALFA
944
while (c <= 0x2cf3) // ..to COPTIC SMALL LETTER BOHAIRIC KHEI
945
charset[i++] = c++;
946
c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP
947
while (c <= 0x2cff) // ..to COPTIC MORPHOLOGICAL DIVIDER
948
charset[i++] = c++;
949
// 2D00..2D2F; Georgian Supplement
950
c = 0x2d00; // from GEORGIAN SMALL LETTER AN
951
while (c <= 0x2d25) // ..to GEORGIAN SMALL LETTER HOE
952
charset[i++] = c++;
953
c = 0x2d27; // from GEORGIAN SMALL LETTER YN
954
while (c <= 0x2d2d) // ..to GEORGIAN SMALL LETTER AEN
955
charset[i++] = c++;
956
// 2D30..2D7F; Tifinagh
957
c = 0x2d30; // from TIFINAGH LETTER YA
958
while (c <= 0x2d67) // ..to TIFINAGH LETTER YO
959
charset[i++] = c++;
960
charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK
961
charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK
962
charset[i++] = 0x2d7f; // TIFINAGH CONSONANT JOINER
963
// 2D80..2DDF; Ethiopic Extended
964
c = 0x2d80; // from ETHIOPIC SYLLABLE LOA
965
while (c <= 0x2d96) // ..to ETHIOPIC SYLLABLE GGWE
966
charset[i++] = c++;
967
c = 0x2da0; // from ETHIOPIC SYLLABLE SSA
968
while (c <= 0x2da6) // ..to ETHIOPIC SYLLABLE SSO
969
charset[i++] = c++;
970
c = 0x2da8; // from ETHIOPIC SYLLABLE CCA
971
while (c <= 0x2dae) // ..to ETHIOPIC SYLLABLE CCO
972
charset[i++] = c++;
973
c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA
974
while (c <= 0x2db6) // ..to ETHIOPIC SYLLABLE ZZO
975
charset[i++] = c++;
976
c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA
977
while (c <= 0x2dbe) // ..to ETHIOPIC SYLLABLE CCHO
978
charset[i++] = c++;
979
c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA
980
while (c <= 0x2dc6) // ..to ETHIOPIC SYLLABLE QYO
981
charset[i++] = c++;
982
c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA
983
while (c <= 0x2dce) // ..to ETHIOPIC SYLLABLE KYO
984
charset[i++] = c++;
985
c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA
986
while (c <= 0x2dd6) // ..to ETHIOPIC SYLLABLE XYO
987
charset[i++] = c++;
988
c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA
989
while (c <= 0x2dde) // ..to ETHIOPIC SYLLABLE GYO
990
charset[i++] = c++;
991
// 2DE0..2DFF; Cyrillic Extended-A
992
c = 0x2de0; // from COMBINING CYRILLIC LETTER BE
993
while (c <= 0x2dff) // ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
994
charset[i++] = c++;
995
// 2E00..2E7F; Supplemental Punctuation
996
c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER
997
while (c <= 0x2e52) // ..to TIRONIAN SIGN CAPITAL ET
998
charset[i++] = c++;
999
// 2E80..2EFF; CJK Radicals Supplement
1000
c = 0x2e80; // from CJK RADICAL REPEAT
1001
while (c <= 0x2e99) // ..to CJK RADICAL RAP
1002
charset[i++] = c++;
1003
c = 0x2e9b; // from CJK RADICAL CHOKE
1004
while (c <= 0x2ef3) // ..to CJK RADICAL C-SIMPLIFIED TURTLE
1005
charset[i++] = c++;
1006
// 2F00..2FDF; Kangxi Radicals
1007
c = 0x2f00; // from KANGXI RADICAL ONE
1008
while (c <= 0x2fd5) // ..to KANGXI RADICAL FLUTE
1009
charset[i++] = c++;
1010
// 2FF0..2FFF; Ideographic Description Characters
1011
c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
1012
while (c <= 0x2ffb) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
1013
charset[i++] = c++;
1014
// 3000..303F; CJK Symbols and Punctuation
1015
c = 0x3000; // from IDEOGRAPHIC SPACE
1016
while (c <= 0x303f) // ..to IDEOGRAPHIC HALF FILL SPACE
1017
charset[i++] = c++;
1018
// 3040..309F; Hiragana
1019
c = 0x3041; // from HIRAGANA LETTER SMALL A
1020
while (c <= 0x3096) // ..to HIRAGANA LETTER SMALL KE
1021
charset[i++] = c++;
1022
c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
1023
while (c <= 0x309f) // ..to HIRAGANA DIGRAPH YORI
1024
charset[i++] = c++;
1025
// 30A0..30FF; Katakana
1026
c = 0x30a0; // from KATAKANA-HIRAGANA DOUBLE HYPHEN
1027
while (c <= 0x30ff) // ..to KATAKANA DIGRAPH KOTO
1028
charset[i++] = c++;
1029
// 3100..312F; Bopomofo
1030
c = 0x3105; // from BOPOMOFO LETTER B
1031
while (c <= 0x312f) // ..to BOPOMOFO LETTER NN
1032
charset[i++] = c++;
1033
// 3130..318F; Hangul Compatibility Jamo
1034
c = 0x3131; // from HANGUL LETTER KIYEOK
1035
while (c <= 0x318e) // ..to HANGUL LETTER ARAEAE
1036
charset[i++] = c++;
1037
// 3190..319F; Kanbun
1038
c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK
1039
while (c <= 0x319f) // ..to IDEOGRAPHIC ANNOTATION MAN MARK
1040
charset[i++] = c++;
1041
// 31A0..31BF; Bopomofo Extended
1042
c = 0x31a0; // from BOPOMOFO LETTER BU
1043
while (c <= 0x31bf) // ..to BOPOMOFO LETTER AH
1044
charset[i++] = c++;
1045
// 31C0..31EF; CJK Strokes
1046
c = 0x31c0; // from CJK STROKE T
1047
while (c <= 0x31e3) // ..to CJK STROKE Q
1048
charset[i++] = c++;
1049
// 31F0..31FF; Katakana Phonetic Extensions
1050
c = 0x31f0; // from KATAKANA LETTER SMALL KU
1051
while (c <= 0x31ff) // ..to KATAKANA LETTER SMALL RO
1052
charset[i++] = c++;
1053
// 3200..32FF; Enclosed CJK Letters and Months
1054
c = 0x3200; // from PARENTHESIZED HANGUL KIYEOK
1055
while (c <= 0x321e) // ..to PARENTHESIZED KOREAN CHARACTER O HU
1056
charset[i++] = c++;
1057
c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE
1058
while (c <= 0x32ff) // ..to SQUARE ERA NAME REIWA
1059
charset[i++] = c++;
1060
// 3300..33FF; CJK Compatibility
1061
c = 0x3300; // from SQUARE APAATO
1062
while (c <= 0x33ff) // ..to SQUARE GAL
1063
charset[i++] = c++;
1064
// 3400..4DBF; CJK Unified Ideographs Extension A
1065
c = 0x3400; // from <CJK Ideograph Extension A, First>
1066
while (c <= 0x4dbf) // ..to <CJK Ideograph Extension A, Last>
1067
charset[i++] = c++;
1068
// 4DC0..4DFF; Yijing Hexagram Symbols
1069
c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN
1070
while (c <= 0x4dff) // ..to HEXAGRAM FOR BEFORE COMPLETION
1071
charset[i++] = c++;
1072
// 4E00..9FFF; CJK Unified Ideographs
1073
c = 0x4e00; // from <CJK Ideograph, First>
1074
while (c <= 0x9ffc) // ..to <CJK Ideograph, Last>
1075
charset[i++] = c++;
1076
// A000..A48F; Yi Syllables
1077
c = 0xa000; // from YI SYLLABLE IT
1078
while (c <= 0xa48c) // ..to YI SYLLABLE YYR
1079
charset[i++] = c++;
1080
// A490..A4CF; Yi Radicals
1081
c = 0xa490; // from YI RADICAL QOT
1082
while (c <= 0xa4c6) // ..to YI RADICAL KE
1083
charset[i++] = c++;
1084
// A4D0..A4FF; Lisu
1085
c = 0xa4d0; // from LISU LETTER BA
1086
while (c <= 0xa4ff) // ..to LISU PUNCTUATION FULL STOP
1087
charset[i++] = c++;
1088
// A500..A63F; Vai
1089
c = 0xa500; // from VAI SYLLABLE EE
1090
while (c <= 0xa62b) // ..to VAI SYLLABLE NDOLE DO
1091
charset[i++] = c++;
1092
// A640..A69F; Cyrillic Extended-B
1093
c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA
1094
while (c <= 0xa69f) // ..to COMBINING CYRILLIC LETTER IOTIFIED E
1095
charset[i++] = c++;
1096
// A6A0..A6FF; Bamum
1097
c = 0xa6a0; // from BAMUM LETTER A
1098
while (c <= 0xa6f7) // ..to BAMUM QUESTION MARK
1099
charset[i++] = c++;
1100
// A700..A71F; Modifier Tone Letters
1101
c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING
1102
while (c <= 0xa71f) // ..to MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
1103
charset[i++] = c++;
1104
// A720..A7FF; Latin Extended-D
1105
c = 0xa720; // from MODIFIER LETTER STRESS AND HIGH TONE
1106
while (c <= 0xa7bf) // ..to LATIN SMALL LETTER GLOTTAL U
1107
charset[i++] = c++;
1108
c = 0xa7c2; // from LATIN CAPITAL LETTER ANGLICANA W
1109
while (c <= 0xa7ca) // ..to LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
1110
charset[i++] = c++;
1111
c = 0xa7f5; // from LATIN CAPITAL LETTER REVERSED HALF H
1112
while (c <= 0xa7ff) // ..to LATIN EPIGRAPHIC LETTER ARCHAIC M
1113
charset[i++] = c++;
1114
// A800..A82F; Syloti Nagri
1115
c = 0xa800; // from SYLOTI NAGRI LETTER A
1116
while (c <= 0xa82c) // ..to SYLOTI NAGRI SIGN ALTERNATE HASANTA
1117
charset[i++] = c++;
1118
// A830..A83F; Common Indic Number Forms
1119
c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER
1120
while (c <= 0xa839) // ..to NORTH INDIC QUANTITY MARK
1121
charset[i++] = c++;
1122
// A840..A87F; Phags-pa
1123
c = 0xa840; // from PHAGS-PA LETTER KA
1124
while (c <= 0xa877) // ..to PHAGS-PA MARK DOUBLE SHAD
1125
charset[i++] = c++;
1126
// A880..A8DF; Saurashtra
1127
c = 0xa880; // from SAURASHTRA SIGN ANUSVARA
1128
while (c <= 0xa8c5) // ..to SAURASHTRA SIGN CANDRABINDU
1129
charset[i++] = c++;
1130
c = 0xa8ce; // from SAURASHTRA DANDA
1131
while (c <= 0xa8d9) // ..to SAURASHTRA DIGIT NINE
1132
charset[i++] = c++;
1133
// A8E0..A8FF; Devanagari Extended
1134
c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO
1135
while (c <= 0xa8ff) // ..to DEVANAGARI VOWEL SIGN AY
1136
charset[i++] = c++;
1137
// A900..A92F; Kayah Li
1138
c = 0xa900; // from KAYAH LI DIGIT ZERO
1139
while (c <= 0xa92f) // ..to KAYAH LI SIGN SHYA
1140
charset[i++] = c++;
1141
// A930..A95F; Rejang
1142
c = 0xa930; // from REJANG LETTER KA
1143
while (c <= 0xa953) // ..to REJANG VIRAMA
1144
charset[i++] = c++;
1145
charset[i++] = 0xa95f; // REJANG SECTION MARK
1146
// A960..A97F; Hangul Jamo Extended-A
1147
c = 0xa960; // from HANGUL CHOSEONG TIKEUT-MIEUM
1148
while (c <= 0xa97c) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH
1149
charset[i++] = c++;
1150
// A980..A9DF; Javanese
1151
c = 0xa980; // from JAVANESE SIGN PANYANGGA
1152
while (c <= 0xa9cd) // ..to JAVANESE TURNED PADA PISELEH
1153
charset[i++] = c++;
1154
c = 0xa9cf; // from JAVANESE PANGRANGKEP
1155
while (c <= 0xa9d9) // ..to JAVANESE DIGIT NINE
1156
charset[i++] = c++;
1157
charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES
1158
charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN
1159
// A9E0..A9FF; Myanmar Extended-B
1160
c = 0xa9e0; // from MYANMAR LETTER SHAN GHA
1161
while (c <= 0xa9fe) // ..to MYANMAR LETTER TAI LAING BHA
1162
charset[i++] = c++;
1163
// AA00..AA5F; Cham
1164
c = 0xaa00; // from CHAM LETTER A
1165
while (c <= 0xaa36) // ..to CHAM CONSONANT SIGN WA
1166
charset[i++] = c++;
1167
c = 0xaa40; // from CHAM LETTER FINAL K
1168
while (c <= 0xaa4d) // ..to CHAM CONSONANT SIGN FINAL H
1169
charset[i++] = c++;
1170
c = 0xaa50; // from CHAM DIGIT ZERO
1171
while (c <= 0xaa59) // ..to CHAM DIGIT NINE
1172
charset[i++] = c++;
1173
c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL
1174
while (c <= 0xaa5f) // ..to CHAM PUNCTUATION TRIPLE DANDA
1175
charset[i++] = c++;
1176
// AA60..AA7F; Myanmar Extended-A
1177
c = 0xaa60; // from MYANMAR LETTER KHAMTI GA
1178
while (c <= 0xaa7f) // ..to MYANMAR LETTER SHWE PALAUNG SHA
1179
charset[i++] = c++;
1180
// AA80..AADF; Tai Viet
1181
c = 0xaa80; // from TAI VIET LETTER LOW KO
1182
while (c <= 0xaac2) // ..to TAI VIET TONE MAI SONG
1183
charset[i++] = c++;
1184
c = 0xaadb; // from TAI VIET SYMBOL KON
1185
while (c <= 0xaadf) // ..to TAI VIET SYMBOL KOI KOI
1186
charset[i++] = c++;
1187
// AAE0..AAFF; Meetei Mayek Extensions
1188
c = 0xaae0; // from MEETEI MAYEK LETTER E
1189
while (c <= 0xaaf6) // ..to MEETEI MAYEK VIRAMA
1190
charset[i++] = c++;
1191
// AB00..AB2F; Ethiopic Extended-A
1192
c = 0xab01; // from ETHIOPIC SYLLABLE TTHU
1193
while (c <= 0xab06) // ..to ETHIOPIC SYLLABLE TTHO
1194
charset[i++] = c++;
1195
c = 0xab09; // from ETHIOPIC SYLLABLE DDHU
1196
while (c <= 0xab0e) // ..to ETHIOPIC SYLLABLE DDHO
1197
charset[i++] = c++;
1198
c = 0xab11; // from ETHIOPIC SYLLABLE DZU
1199
while (c <= 0xab16) // ..to ETHIOPIC SYLLABLE DZO
1200
charset[i++] = c++;
1201
c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA
1202
while (c <= 0xab26) // ..to ETHIOPIC SYLLABLE CCHHO
1203
charset[i++] = c++;
1204
c = 0xab28; // from ETHIOPIC SYLLABLE BBA
1205
while (c <= 0xab2e) // ..to ETHIOPIC SYLLABLE BBO
1206
charset[i++] = c++;
1207
// AB30..AB6F; Latin Extended-E
1208
c = 0xab30; // from LATIN SMALL LETTER BARRED ALPHA
1209
while (c <= 0xab6b) // ..to MODIFIER LETTER RIGHT TACK
1210
charset[i++] = c++;
1211
// AB70..ABBF; Cherokee Supplement
1212
c = 0xab70; // from CHEROKEE SMALL LETTER A
1213
while (c <= 0xabbf) // ..to CHEROKEE SMALL LETTER YA
1214
charset[i++] = c++;
1215
// ABC0..ABFF; Meetei Mayek
1216
c = 0xabc0; // from MEETEI MAYEK LETTER KOK
1217
while (c <= 0xabed) // ..to MEETEI MAYEK APUN IYEK
1218
charset[i++] = c++;
1219
c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO
1220
while (c <= 0xabf9) // ..to MEETEI MAYEK DIGIT NINE
1221
charset[i++] = c++;
1222
// AC00..D7AF; Hangul Syllables
1223
c = 0xac00; // from <Hangul Syllable, First>
1224
while (c <= 0xd7a3) // ..to <Hangul Syllable, Last>
1225
charset[i++] = c++;
1226
// D7B0..D7FF; Hangul Jamo Extended-B
1227
c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO
1228
while (c <= 0xd7c6) // ..to HANGUL JUNGSEONG ARAEA-E
1229
charset[i++] = c++;
1230
c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL
1231
while (c <= 0xd7fb) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH
1232
charset[i++] = c++;
1233
// D800..DB7F; High Surrogates
1234
// DB80..DBFF; High Private Use Surrogates
1235
// DC00..DFFF; Low Surrogates
1236
// E000..F8FF; Private Use Area
1237
// F900..FAFF; CJK Compatibility Ideographs
1238
c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900
1239
while (c <= 0xfa6d) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D
1240
charset[i++] = c++;
1241
c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70
1242
while (c <= 0xfad9) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9
1243
charset[i++] = c++;
1244
// FB00..FB4F; Alphabetic Presentation Forms
1245
c = 0xfb00; // from LATIN SMALL LIGATURE FF
1246
while (c <= 0xfb06) // ..to LATIN SMALL LIGATURE ST
1247
charset[i++] = c++;
1248
c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW
1249
while (c <= 0xfb17) // ..to ARMENIAN SMALL LIGATURE MEN XEH
1250
charset[i++] = c++;
1251
c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ
1252
while (c <= 0xfb36) // ..to HEBREW LETTER ZAYIN WITH DAGESH
1253
charset[i++] = c++;
1254
c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH
1255
while (c <= 0xfb3c) // ..to HEBREW LETTER LAMED WITH DAGESH
1256
charset[i++] = c++;
1257
charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH
1258
charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH
1259
charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH
1260
charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH
1261
c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH
1262
while (c <= 0xfb4f) // ..to HEBREW LIGATURE ALEF LAMED
1263
charset[i++] = c++;
1264
// FB50..FDFF; Arabic Presentation Forms-A
1265
c = 0xfb50; // from ARABIC LETTER ALEF WASLA ISOLATED FORM
1266
while (c <= 0xfbc1) // ..to ARABIC SYMBOL SMALL TAH BELOW
1267
charset[i++] = c++;
1268
c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM
1269
while (c <= 0xfd3f) // ..to ORNATE RIGHT PARENTHESIS
1270
charset[i++] = c++;
1271
c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
1272
while (c <= 0xfd8f) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
1273
charset[i++] = c++;
1274
c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
1275
while (c <= 0xfdc7) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
1276
charset[i++] = c++;
1277
c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
1278
while (c <= 0xfdfd) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
1279
charset[i++] = c++;
1280
// FE00..FE0F; Variation Selectors
1281
c = 0xfe00; // from VARIATION SELECTOR-1
1282
while (c <= 0xfe0f) // ..to VARIATION SELECTOR-16
1283
charset[i++] = c++;
1284
// FE10..FE1F; Vertical Forms
1285
c = 0xfe10; // from PRESENTATION FORM FOR VERTICAL COMMA
1286
while (c <= 0xfe19) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
1287
charset[i++] = c++;
1288
// FE20..FE2F; Combining Half Marks
1289
c = 0xfe20; // from COMBINING LIGATURE LEFT HALF
1290
while (c <= 0xfe2f) // ..to COMBINING CYRILLIC TITLO RIGHT HALF
1291
charset[i++] = c++;
1292
// FE30..FE4F; CJK Compatibility Forms
1293
c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
1294
while (c <= 0xfe4f) // ..to WAVY LOW LINE
1295
charset[i++] = c++;
1296
// FE50..FE6F; Small Form Variants
1297
charset[i++] = 0xfe50; // SMALL COMMA
1298
charset[i++] = 0xfe52; // SMALL FULL STOP
1299
c = 0xfe54; // from SMALL SEMICOLON
1300
while (c <= 0xfe66) // ..to SMALL EQUALS SIGN
1301
charset[i++] = c++;
1302
c = 0xfe68; // from SMALL REVERSE SOLIDUS
1303
while (c <= 0xfe6b) // ..to SMALL COMMERCIAL AT
1304
charset[i++] = c++;
1305
// FE70..FEFF; Arabic Presentation Forms-B
1306
c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM
1307
while (c <= 0xfe74) // ..to ARABIC KASRATAN ISOLATED FORM
1308
charset[i++] = c++;
1309
c = 0xfe76; // from ARABIC FATHA ISOLATED FORM
1310
while (c <= 0xfefc) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM
1311
charset[i++] = c++;
1312
charset[i++] = 0xfeff; // ZERO WIDTH NO-BREAK SPACE
1313
// FF00..FFEF; Halfwidth and Fullwidth Forms
1314
c = 0xff01; // from FULLWIDTH EXCLAMATION MARK
1315
while (c <= 0xffbe) // ..to HALFWIDTH HANGUL LETTER HIEUH
1316
charset[i++] = c++;
1317
c = 0xffc2; // from HALFWIDTH HANGUL LETTER A
1318
while (c <= 0xffc7) // ..to HALFWIDTH HANGUL LETTER E
1319
charset[i++] = c++;
1320
c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO
1321
while (c <= 0xffcf) // ..to HALFWIDTH HANGUL LETTER OE
1322
charset[i++] = c++;
1323
c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO
1324
while (c <= 0xffd7) // ..to HALFWIDTH HANGUL LETTER YU
1325
charset[i++] = c++;
1326
charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU
1327
charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I
1328
c = 0xffe0; // from FULLWIDTH CENT SIGN
1329
while (c <= 0xffe6) // ..to FULLWIDTH WON SIGN
1330
charset[i++] = c++;
1331
c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL
1332
while (c <= 0xffee) // ..to HALFWIDTH WHITE CIRCLE
1333
charset[i++] = c++;
1334
// FFF0..FFFF; Specials
1335
c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR
1336
while (c <= 0xfffd) // ..to REPLACEMENT CHARACTER
1337
charset[i++] = c++;
1338
// 10000..1007F; Linear B Syllabary
1339
c = 0x10000; // from LINEAR B SYLLABLE B008 A
1340
while (c <= 0x1000b) // ..to LINEAR B SYLLABLE B046 JE
1341
charset[i++] = c++;
1342
c = 0x1000d; // from LINEAR B SYLLABLE B036 JO
1343
while (c <= 0x10026) // ..to LINEAR B SYLLABLE B032 QO
1344
charset[i++] = c++;
1345
c = 0x10028; // from LINEAR B SYLLABLE B060 RA
1346
while (c <= 0x1003a) // ..to LINEAR B SYLLABLE B042 WO
1347
charset[i++] = c++;
1348
charset[i++] = 0x1003c; // LINEAR B SYLLABLE B017 ZA
1349
charset[i++] = 0x1003d; // LINEAR B SYLLABLE B074 ZE
1350
c = 0x1003f; // from LINEAR B SYLLABLE B020 ZO
1351
while (c <= 0x1004d) // ..to LINEAR B SYLLABLE B091 TWO
1352
charset[i++] = c++;
1353
c = 0x10050; // from LINEAR B SYMBOL B018
1354
while (c <= 0x1005d) // ..to LINEAR B SYMBOL B089
1355
charset[i++] = c++;
1356
// 10080..100FF; Linear B Ideograms
1357
c = 0x10080; // from LINEAR B IDEOGRAM B100 MAN
1358
while (c <= 0x100fa) // ..to LINEAR B IDEOGRAM VESSEL B305
1359
charset[i++] = c++;
1360
// 10100..1013F; Aegean Numbers
1361
charset[i++] = 0x10100; // AEGEAN WORD SEPARATOR LINE
1362
charset[i++] = 0x10102; // AEGEAN CHECK MARK
1363
c = 0x10107; // from AEGEAN NUMBER ONE
1364
while (c <= 0x10133) // ..to AEGEAN NUMBER NINETY THOUSAND
1365
charset[i++] = c++;
1366
c = 0x10137; // from AEGEAN WEIGHT BASE UNIT
1367
while (c <= 0x1013f) // ..to AEGEAN MEASURE THIRD SUBUNIT
1368
charset[i++] = c++;
1369
// 10140..1018F; Ancient Greek Numbers
1370
c = 0x10140; // from GREEK ACROPHONIC ATTIC ONE QUARTER
1371
while (c <= 0x1018e) // ..to NOMISMA SIGN
1372
charset[i++] = c++;
1373
// 10190..101CF; Ancient Symbols
1374
c = 0x10190; // from ROMAN SEXTANS SIGN
1375
while (c <= 0x1019c) // ..to ASCIA SYMBOL
1376
charset[i++] = c++;
1377
charset[i++] = 0x101a0; // GREEK SYMBOL TAU RHO
1378
// 101D0..101FF; Phaistos Disc
1379
c = 0x101d0; // from PHAISTOS DISC SIGN PEDESTRIAN
1380
while (c <= 0x101fd) // ..to PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
1381
charset[i++] = c++;
1382
// 10280..1029F; Lycian
1383
c = 0x10280; // from LYCIAN LETTER A
1384
while (c <= 0x1029c) // ..to LYCIAN LETTER X
1385
charset[i++] = c++;
1386
// 102A0..102DF; Carian
1387
c = 0x102a0; // from CARIAN LETTER A
1388
while (c <= 0x102d0) // ..to CARIAN LETTER UUU3
1389
charset[i++] = c++;
1390
// 102E0..102FF; Coptic Epact Numbers
1391
c = 0x102e0; // from COPTIC EPACT THOUSANDS MARK
1392
while (c <= 0x102fb) // ..to COPTIC EPACT NUMBER NINE HUNDRED
1393
charset[i++] = c++;
1394
// 10300..1032F; Old Italic
1395
c = 0x10300; // from OLD ITALIC LETTER A
1396
while (c <= 0x10323) // ..to OLD ITALIC NUMERAL FIFTY
1397
charset[i++] = c++;
1398
charset[i++] = 0x1032d; // OLD ITALIC LETTER YE
1399
charset[i++] = 0x1032f; // OLD ITALIC LETTER SOUTHERN TSE
1400
// 10330..1034F; Gothic
1401
c = 0x10330; // from GOTHIC LETTER AHSA
1402
while (c <= 0x1034a) // ..to GOTHIC LETTER NINE HUNDRED
1403
charset[i++] = c++;
1404
// 10350..1037F; Old Permic
1405
c = 0x10350; // from OLD PERMIC LETTER AN
1406
while (c <= 0x1037a) // ..to COMBINING OLD PERMIC LETTER SII
1407
charset[i++] = c++;
1408
// 10380..1039F; Ugaritic
1409
c = 0x10380; // from UGARITIC LETTER ALPA
1410
while (c <= 0x1039d) // ..to UGARITIC LETTER SSU
1411
charset[i++] = c++;
1412
charset[i++] = 0x1039f; // UGARITIC WORD DIVIDER
1413
// 103A0..103DF; Old Persian
1414
c = 0x103a0; // from OLD PERSIAN SIGN A
1415
while (c <= 0x103c3) // ..to OLD PERSIAN SIGN HA
1416
charset[i++] = c++;
1417
c = 0x103c8; // from OLD PERSIAN SIGN AURAMAZDAA
1418
while (c <= 0x103d5) // ..to OLD PERSIAN NUMBER HUNDRED
1419
charset[i++] = c++;
1420
// 10400..1044F; Deseret
1421
c = 0x10400; // from DESERET CAPITAL LETTER LONG I
1422
while (c <= 0x1044f) // ..to DESERET SMALL LETTER EW
1423
charset[i++] = c++;
1424
// 10450..1047F; Shavian
1425
c = 0x10450; // from SHAVIAN LETTER PEEP
1426
while (c <= 0x1047f) // ..to SHAVIAN LETTER YEW
1427
charset[i++] = c++;
1428
// 10480..104AF; Osmanya
1429
c = 0x10480; // from OSMANYA LETTER ALEF
1430
while (c <= 0x1049d) // ..to OSMANYA LETTER OO
1431
charset[i++] = c++;
1432
c = 0x104a0; // from OSMANYA DIGIT ZERO
1433
while (c <= 0x104a9) // ..to OSMANYA DIGIT NINE
1434
charset[i++] = c++;
1435
// 104B0..104FF; Osage
1436
c = 0x104b0; // from OSAGE CAPITAL LETTER A
1437
while (c <= 0x104d3) // ..to OSAGE CAPITAL LETTER ZHA
1438
charset[i++] = c++;
1439
c = 0x104d8; // from OSAGE SMALL LETTER A
1440
while (c <= 0x104fb) // ..to OSAGE SMALL LETTER ZHA
1441
charset[i++] = c++;
1442
// 10500..1052F; Elbasan
1443
c = 0x10500; // from ELBASAN LETTER A
1444
while (c <= 0x10527) // ..to ELBASAN LETTER KHE
1445
charset[i++] = c++;
1446
// 10530..1056F; Caucasian Albanian
1447
c = 0x10530; // from CAUCASIAN ALBANIAN LETTER ALT
1448
while (c <= 0x10563) // ..to CAUCASIAN ALBANIAN LETTER KIW
1449
charset[i++] = c++;
1450
charset[i++] = 0x1056f; // CAUCASIAN ALBANIAN CITATION MARK
1451
// 10600..1077F; Linear A
1452
c = 0x10600; // from LINEAR A SIGN AB001
1453
while (c <= 0x10736) // ..to LINEAR A SIGN A664
1454
charset[i++] = c++;
1455
c = 0x10740; // from LINEAR A SIGN A701 A
1456
while (c <= 0x10755) // ..to LINEAR A SIGN A732 JE
1457
charset[i++] = c++;
1458
c = 0x10760; // from LINEAR A SIGN A800
1459
while (c <= 0x10767) // ..to LINEAR A SIGN A807
1460
charset[i++] = c++;
1461
// 10800..1083F; Cypriot Syllabary
1462
c = 0x10800; // from CYPRIOT SYLLABLE A
1463
while (c <= 0x10805) // ..to CYPRIOT SYLLABLE JA
1464
charset[i++] = c++;
1465
c = 0x1080a; // from CYPRIOT SYLLABLE KA
1466
while (c <= 0x10835) // ..to CYPRIOT SYLLABLE WO
1467
charset[i++] = c++;
1468
charset[i++] = 0x10837; // CYPRIOT SYLLABLE XA
1469
charset[i++] = 0x10838; // CYPRIOT SYLLABLE XE
1470
c = 0x1083c; // from CYPRIOT SYLLABLE ZA
1471
while (c <= 0x1083f) // ..to CYPRIOT SYLLABLE ZO
1472
charset[i++] = c++;
1473
// 10840..1085F; Imperial Aramaic
1474
c = 0x10840; // from IMPERIAL ARAMAIC LETTER ALEPH
1475
while (c <= 0x10855) // ..to IMPERIAL ARAMAIC LETTER TAW
1476
charset[i++] = c++;
1477
c = 0x10857; // from IMPERIAL ARAMAIC SECTION SIGN
1478
while (c <= 0x1085f) // ..to IMPERIAL ARAMAIC NUMBER TEN THOUSAND
1479
charset[i++] = c++;
1480
// 10860..1087F; Palmyrene
1481
c = 0x10860; // from PALMYRENE LETTER ALEPH
1482
while (c <= 0x1087f) // ..to PALMYRENE NUMBER TWENTY
1483
charset[i++] = c++;
1484
// 10880..108AF; Nabataean
1485
c = 0x10880; // from NABATAEAN LETTER FINAL ALEPH
1486
while (c <= 0x1089e) // ..to NABATAEAN LETTER TAW
1487
charset[i++] = c++;
1488
c = 0x108a7; // from NABATAEAN NUMBER ONE
1489
while (c <= 0x108af) // ..to NABATAEAN NUMBER ONE HUNDRED
1490
charset[i++] = c++;
1491
// 108E0..108FF; Hatran
1492
c = 0x108e0; // from HATRAN LETTER ALEPH
1493
while (c <= 0x108f2) // ..to HATRAN LETTER QOPH
1494
charset[i++] = c++;
1495
charset[i++] = 0x108f4; // HATRAN LETTER SHIN
1496
charset[i++] = 0x108f5; // HATRAN LETTER TAW
1497
c = 0x108fb; // from HATRAN NUMBER ONE
1498
while (c <= 0x108ff) // ..to HATRAN NUMBER ONE HUNDRED
1499
charset[i++] = c++;
1500
// 10900..1091F; Phoenician
1501
c = 0x10900; // from PHOENICIAN LETTER ALF
1502
while (c <= 0x1091b) // ..to PHOENICIAN NUMBER THREE
1503
charset[i++] = c++;
1504
charset[i++] = 0x1091f; // PHOENICIAN WORD SEPARATOR
1505
// 10920..1093F; Lydian
1506
c = 0x10920; // from LYDIAN LETTER A
1507
while (c <= 0x10939) // ..to LYDIAN LETTER C
1508
charset[i++] = c++;
1509
charset[i++] = 0x1093f; // LYDIAN TRIANGULAR MARK
1510
// 10980..1099F; Meroitic Hieroglyphs
1511
c = 0x10980; // from MEROITIC HIEROGLYPHIC LETTER A
1512
while (c <= 0x1099f) // ..to MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2
1513
charset[i++] = c++;
1514
// 109A0..109FF; Meroitic Cursive
1515
c = 0x109a0; // from MEROITIC CURSIVE LETTER A
1516
while (c <= 0x109b7) // ..to MEROITIC CURSIVE LETTER DA
1517
charset[i++] = c++;
1518
c = 0x109bc; // from MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS
1519
while (c <= 0x109cf) // ..to MEROITIC CURSIVE NUMBER SEVENTY
1520
charset[i++] = c++;
1521
c = 0x109d2; // from MEROITIC CURSIVE NUMBER ONE HUNDRED
1522
while (c <= 0x109ff) // ..to MEROITIC CURSIVE FRACTION TEN TWELFTHS
1523
charset[i++] = c++;
1524
// 10A00..10A5F; Kharoshthi
1525
c = 0x10a00; // from KHAROSHTHI LETTER A
1526
while (c <= 0x10a03) // ..to KHAROSHTHI VOWEL SIGN VOCALIC R
1527
charset[i++] = c++;
1528
charset[i++] = 0x10a05; // KHAROSHTHI VOWEL SIGN E
1529
charset[i++] = 0x10a06; // KHAROSHTHI VOWEL SIGN O
1530
c = 0x10a0c; // from KHAROSHTHI VOWEL LENGTH MARK
1531
while (c <= 0x10a13) // ..to KHAROSHTHI LETTER GHA
1532
charset[i++] = c++;
1533
charset[i++] = 0x10a15; // KHAROSHTHI LETTER CA
1534
charset[i++] = 0x10a17; // KHAROSHTHI LETTER JA
1535
c = 0x10a19; // from KHAROSHTHI LETTER NYA
1536
while (c <= 0x10a35) // ..to KHAROSHTHI LETTER VHA
1537
charset[i++] = c++;
1538
charset[i++] = 0x10a38; // KHAROSHTHI SIGN BAR ABOVE
1539
charset[i++] = 0x10a3a; // KHAROSHTHI SIGN DOT BELOW
1540
c = 0x10a3f; // from KHAROSHTHI VIRAMA
1541
while (c <= 0x10a48) // ..to KHAROSHTHI FRACTION ONE HALF
1542
charset[i++] = c++;
1543
c = 0x10a50; // from KHAROSHTHI PUNCTUATION DOT
1544
while (c <= 0x10a58) // ..to KHAROSHTHI PUNCTUATION LINES
1545
charset[i++] = c++;
1546
// 10A60..10A7F; Old South Arabian
1547
c = 0x10a60; // from OLD SOUTH ARABIAN LETTER HE
1548
while (c <= 0x10a7f) // ..to OLD SOUTH ARABIAN NUMERIC INDICATOR
1549
charset[i++] = c++;
1550
// 10A80..10A9F; Old North Arabian
1551
c = 0x10a80; // from OLD NORTH ARABIAN LETTER HEH
1552
while (c <= 0x10a9f) // ..to OLD NORTH ARABIAN NUMBER TWENTY
1553
charset[i++] = c++;
1554
// 10AC0..10AFF; Manichaean
1555
c = 0x10ac0; // from MANICHAEAN LETTER ALEPH
1556
while (c <= 0x10ae6) // ..to MANICHAEAN ABBREVIATION MARK BELOW
1557
charset[i++] = c++;
1558
c = 0x10aeb; // from MANICHAEAN NUMBER ONE
1559
while (c <= 0x10af6) // ..to MANICHAEAN PUNCTUATION LINE FILLER
1560
charset[i++] = c++;
1561
// 10B00..10B3F; Avestan
1562
c = 0x10b00; // from AVESTAN LETTER A
1563
while (c <= 0x10b35) // ..to AVESTAN LETTER HE
1564
charset[i++] = c++;
1565
c = 0x10b39; // from AVESTAN ABBREVIATION MARK
1566
while (c <= 0x10b3f) // ..to LARGE ONE RING OVER TWO RINGS PUNCTUATION
1567
charset[i++] = c++;
1568
// 10B40..10B5F; Inscriptional Parthian
1569
c = 0x10b40; // from INSCRIPTIONAL PARTHIAN LETTER ALEPH
1570
while (c <= 0x10b55) // ..to INSCRIPTIONAL PARTHIAN LETTER TAW
1571
charset[i++] = c++;
1572
c = 0x10b58; // from INSCRIPTIONAL PARTHIAN NUMBER ONE
1573
while (c <= 0x10b5f) // ..to INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
1574
charset[i++] = c++;
1575
// 10B60..10B7F; Inscriptional Pahlavi
1576
c = 0x10b60; // from INSCRIPTIONAL PAHLAVI LETTER ALEPH
1577
while (c <= 0x10b72) // ..to INSCRIPTIONAL PAHLAVI LETTER TAW
1578
charset[i++] = c++;
1579
c = 0x10b78; // from INSCRIPTIONAL PAHLAVI NUMBER ONE
1580
while (c <= 0x10b7f) // ..to INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
1581
charset[i++] = c++;
1582
// 10B80..10BAF; Psalter Pahlavi
1583
c = 0x10b80; // from PSALTER PAHLAVI LETTER ALEPH
1584
while (c <= 0x10b91) // ..to PSALTER PAHLAVI LETTER TAW
1585
charset[i++] = c++;
1586
c = 0x10b99; // from PSALTER PAHLAVI SECTION MARK
1587
while (c <= 0x10b9c) // ..to PSALTER PAHLAVI FOUR DOTS WITH DOT
1588
charset[i++] = c++;
1589
c = 0x10ba9; // from PSALTER PAHLAVI NUMBER ONE
1590
while (c <= 0x10baf) // ..to PSALTER PAHLAVI NUMBER ONE HUNDRED
1591
charset[i++] = c++;
1592
// 10C00..10C4F; Old Turkic
1593
c = 0x10c00; // from OLD TURKIC LETTER ORKHON A
1594
while (c <= 0x10c48) // ..to OLD TURKIC LETTER ORKHON BASH
1595
charset[i++] = c++;
1596
// 10C80..10CFF; Old Hungarian
1597
c = 0x10c80; // from OLD HUNGARIAN CAPITAL LETTER A
1598
while (c <= 0x10cb2) // ..to OLD HUNGARIAN CAPITAL LETTER US
1599
charset[i++] = c++;
1600
c = 0x10cc0; // from OLD HUNGARIAN SMALL LETTER A
1601
while (c <= 0x10cf2) // ..to OLD HUNGARIAN SMALL LETTER US
1602
charset[i++] = c++;
1603
c = 0x10cfa; // from OLD HUNGARIAN NUMBER ONE
1604
while (c <= 0x10cff) // ..to OLD HUNGARIAN NUMBER ONE THOUSAND
1605
charset[i++] = c++;
1606
// 10D00..10D3F; Hanifi Rohingya
1607
c = 0x10d00; // from HANIFI ROHINGYA LETTER A
1608
while (c <= 0x10d27) // ..to HANIFI ROHINGYA SIGN TASSI
1609
charset[i++] = c++;
1610
c = 0x10d30; // from HANIFI ROHINGYA DIGIT ZERO
1611
while (c <= 0x10d39) // ..to HANIFI ROHINGYA DIGIT NINE
1612
charset[i++] = c++;
1613
// 10E60..10E7F; Rumi Numeral Symbols
1614
c = 0x10e60; // from RUMI DIGIT ONE
1615
while (c <= 0x10e7e) // ..to RUMI FRACTION TWO THIRDS
1616
charset[i++] = c++;
1617
// 10E80..10EBF; Yezidi
1618
c = 0x10e80; // from YEZIDI LETTER ELIF
1619
while (c <= 0x10ea9) // ..to YEZIDI LETTER ET
1620
charset[i++] = c++;
1621
charset[i++] = 0x10eab; // YEZIDI COMBINING HAMZA MARK
1622
charset[i++] = 0x10ead; // YEZIDI HYPHENATION MARK
1623
charset[i++] = 0x10eb0; // YEZIDI LETTER LAM WITH DOT ABOVE
1624
charset[i++] = 0x10eb1; // YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
1625
// 10F00..10F2F; Old Sogdian
1626
c = 0x10f00; // from OLD SOGDIAN LETTER ALEPH
1627
while (c <= 0x10f27) // ..to OLD SOGDIAN LIGATURE AYIN-DALETH
1628
charset[i++] = c++;
1629
// 10F30..10F6F; Sogdian
1630
c = 0x10f30; // from SOGDIAN LETTER ALEPH
1631
while (c <= 0x10f59) // ..to SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
1632
charset[i++] = c++;
1633
// 10FB0..10FDF; Chorasmian
1634
c = 0x10fb0; // from CHORASMIAN LETTER ALEPH
1635
while (c <= 0x10fcb) // ..to CHORASMIAN NUMBER ONE HUNDRED
1636
charset[i++] = c++;
1637
// 10FE0..10FFF; Elymaic
1638
c = 0x10fe0; // from ELYMAIC LETTER ALEPH
1639
while (c <= 0x10ff6) // ..to ELYMAIC LIGATURE ZAYIN-YODH
1640
charset[i++] = c++;
1641
// 11000..1107F; Brahmi
1642
c = 0x11000; // from BRAHMI SIGN CANDRABINDU
1643
while (c <= 0x1104d) // ..to BRAHMI PUNCTUATION LOTUS
1644
charset[i++] = c++;
1645
c = 0x11052; // from BRAHMI NUMBER ONE
1646
while (c <= 0x1106f) // ..to BRAHMI DIGIT NINE
1647
charset[i++] = c++;
1648
charset[i++] = 0x1107f; // BRAHMI NUMBER JOINER
1649
// 11080..110CF; Kaithi
1650
c = 0x11080; // from KAITHI SIGN CANDRABINDU
1651
while (c <= 0x110c1) // ..to KAITHI DOUBLE DANDA
1652
charset[i++] = c++;
1653
charset[i++] = 0x110cd; // KAITHI NUMBER SIGN ABOVE
1654
// 110D0..110FF; Sora Sompeng
1655
c = 0x110d0; // from SORA SOMPENG LETTER SAH
1656
while (c <= 0x110e8) // ..to SORA SOMPENG LETTER MAE
1657
charset[i++] = c++;
1658
c = 0x110f0; // from SORA SOMPENG DIGIT ZERO
1659
while (c <= 0x110f9) // ..to SORA SOMPENG DIGIT NINE
1660
charset[i++] = c++;
1661
// 11100..1114F; Chakma
1662
c = 0x11100; // from CHAKMA SIGN CANDRABINDU
1663
while (c <= 0x11134) // ..to CHAKMA MAAYYAA
1664
charset[i++] = c++;
1665
c = 0x11136; // from CHAKMA DIGIT ZERO
1666
while (c <= 0x11147) // ..to CHAKMA LETTER VAA
1667
charset[i++] = c++;
1668
// 11150..1117F; Mahajani
1669
c = 0x11150; // from MAHAJANI LETTER A
1670
while (c <= 0x11176) // ..to MAHAJANI LIGATURE SHRI
1671
charset[i++] = c++;
1672
// 11180..111DF; Sharada
1673
c = 0x11180; // from SHARADA SIGN CANDRABINDU
1674
while (c <= 0x111df) // ..to SHARADA SECTION MARK-2
1675
charset[i++] = c++;
1676
// 111E0..111FF; Sinhala Archaic Numbers
1677
c = 0x111e1; // from SINHALA ARCHAIC DIGIT ONE
1678
while (c <= 0x111f4) // ..to SINHALA ARCHAIC NUMBER ONE THOUSAND
1679
charset[i++] = c++;
1680
// 11200..1124F; Khojki
1681
c = 0x11200; // from KHOJKI LETTER A
1682
while (c <= 0x11211) // ..to KHOJKI LETTER JJA
1683
charset[i++] = c++;
1684
c = 0x11213; // from KHOJKI LETTER NYA
1685
while (c <= 0x1123e) // ..to KHOJKI SIGN SUKUN
1686
charset[i++] = c++;
1687
// 11280..112AF; Multani
1688
c = 0x11280; // from MULTANI LETTER A
1689
while (c <= 0x11286) // ..to MULTANI LETTER GA
1690
charset[i++] = c++;
1691
c = 0x1128a; // from MULTANI LETTER CA
1692
while (c <= 0x1128d) // ..to MULTANI LETTER JJA
1693
charset[i++] = c++;
1694
c = 0x1128f; // from MULTANI LETTER NYA
1695
while (c <= 0x1129d) // ..to MULTANI LETTER BA
1696
charset[i++] = c++;
1697
c = 0x1129f; // from MULTANI LETTER BHA
1698
while (c <= 0x112a9) // ..to MULTANI SECTION MARK
1699
charset[i++] = c++;
1700
// 112B0..112FF; Khudawadi
1701
c = 0x112b0; // from KHUDAWADI LETTER A
1702
while (c <= 0x112ea) // ..to KHUDAWADI SIGN VIRAMA
1703
charset[i++] = c++;
1704
c = 0x112f0; // from KHUDAWADI DIGIT ZERO
1705
while (c <= 0x112f9) // ..to KHUDAWADI DIGIT NINE
1706
charset[i++] = c++;
1707
// 11300..1137F; Grantha
1708
c = 0x11300; // from GRANTHA SIGN COMBINING ANUSVARA ABOVE
1709
while (c <= 0x11303) // ..to GRANTHA SIGN VISARGA
1710
charset[i++] = c++;
1711
c = 0x11305; // from GRANTHA LETTER A
1712
while (c <= 0x1130c) // ..to GRANTHA LETTER VOCALIC L
1713
charset[i++] = c++;
1714
charset[i++] = 0x1130f; // GRANTHA LETTER EE
1715
charset[i++] = 0x11310; // GRANTHA LETTER AI
1716
c = 0x11313; // from GRANTHA LETTER OO
1717
while (c <= 0x11328) // ..to GRANTHA LETTER NA
1718
charset[i++] = c++;
1719
c = 0x1132a; // from GRANTHA LETTER PA
1720
while (c <= 0x11330) // ..to GRANTHA LETTER RA
1721
charset[i++] = c++;
1722
charset[i++] = 0x11332; // GRANTHA LETTER LA
1723
charset[i++] = 0x11333; // GRANTHA LETTER LLA
1724
c = 0x11335; // from GRANTHA LETTER VA
1725
while (c <= 0x11339) // ..to GRANTHA LETTER HA
1726
charset[i++] = c++;
1727
c = 0x1133b; // from COMBINING BINDU BELOW
1728
while (c <= 0x11344) // ..to GRANTHA VOWEL SIGN VOCALIC RR
1729
charset[i++] = c++;
1730
charset[i++] = 0x11347; // GRANTHA VOWEL SIGN EE
1731
charset[i++] = 0x11348; // GRANTHA VOWEL SIGN AI
1732
charset[i++] = 0x1134b; // GRANTHA VOWEL SIGN OO
1733
charset[i++] = 0x1134d; // GRANTHA SIGN VIRAMA
1734
c = 0x1135d; // from GRANTHA SIGN PLUTA
1735
while (c <= 0x11363) // ..to GRANTHA VOWEL SIGN VOCALIC LL
1736
charset[i++] = c++;
1737
c = 0x11366; // from COMBINING GRANTHA DIGIT ZERO
1738
while (c <= 0x1136c) // ..to COMBINING GRANTHA DIGIT SIX
1739
charset[i++] = c++;
1740
c = 0x11370; // from COMBINING GRANTHA LETTER A
1741
while (c <= 0x11374) // ..to COMBINING GRANTHA LETTER PA
1742
charset[i++] = c++;
1743
// 11400..1147F; Newa
1744
c = 0x11400; // from NEWA LETTER A
1745
while (c <= 0x1145b) // ..to NEWA PLACEHOLDER MARK
1746
charset[i++] = c++;
1747
c = 0x1145d; // from NEWA INSERTION SIGN
1748
while (c <= 0x11461) // ..to NEWA SIGN UPADHMANIYA
1749
charset[i++] = c++;
1750
// 11480..114DF; Tirhuta
1751
c = 0x11480; // from TIRHUTA ANJI
1752
while (c <= 0x114c7) // ..to TIRHUTA OM
1753
charset[i++] = c++;
1754
c = 0x114d0; // from TIRHUTA DIGIT ZERO
1755
while (c <= 0x114d9) // ..to TIRHUTA DIGIT NINE
1756
charset[i++] = c++;
1757
// 11580..115FF; Siddham
1758
c = 0x11580; // from SIDDHAM LETTER A
1759
while (c <= 0x115b5) // ..to SIDDHAM VOWEL SIGN VOCALIC RR
1760
charset[i++] = c++;
1761
c = 0x115b8; // from SIDDHAM VOWEL SIGN E
1762
while (c <= 0x115dd) // ..to SIDDHAM VOWEL SIGN ALTERNATE UU
1763
charset[i++] = c++;
1764
// 11600..1165F; Modi
1765
c = 0x11600; // from MODI LETTER A
1766
while (c <= 0x11644) // ..to MODI SIGN HUVA
1767
charset[i++] = c++;
1768
c = 0x11650; // from MODI DIGIT ZERO
1769
while (c <= 0x11659) // ..to MODI DIGIT NINE
1770
charset[i++] = c++;
1771
// 11660..1167F; Mongolian Supplement
1772
c = 0x11660; // from MONGOLIAN BIRGA WITH ORNAMENT
1773
while (c <= 0x1166c) // ..to MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
1774
charset[i++] = c++;
1775
// 11680..116CF; Takri
1776
c = 0x11680; // from TAKRI LETTER A
1777
while (c <= 0x116b8) // ..to TAKRI LETTER ARCHAIC KHA
1778
charset[i++] = c++;
1779
c = 0x116c0; // from TAKRI DIGIT ZERO
1780
while (c <= 0x116c9) // ..to TAKRI DIGIT NINE
1781
charset[i++] = c++;
1782
// 11700..1173F; Ahom
1783
c = 0x11700; // from AHOM LETTER KA
1784
while (c <= 0x1171a) // ..to AHOM LETTER ALTERNATE BA
1785
charset[i++] = c++;
1786
c = 0x1171d; // from AHOM CONSONANT SIGN MEDIAL LA
1787
while (c <= 0x1172b) // ..to AHOM SIGN KILLER
1788
charset[i++] = c++;
1789
c = 0x11730; // from AHOM DIGIT ZERO
1790
while (c <= 0x1173f) // ..to AHOM SYMBOL VI
1791
charset[i++] = c++;
1792
// 11800..1184F; Dogra
1793
c = 0x11800; // from DOGRA LETTER A
1794
while (c <= 0x1183b) // ..to DOGRA ABBREVIATION SIGN
1795
charset[i++] = c++;
1796
// 118A0..118FF; Warang Citi
1797
c = 0x118a0; // from WARANG CITI CAPITAL LETTER NGAA
1798
while (c <= 0x118f2) // ..to WARANG CITI NUMBER NINETY
1799
charset[i++] = c++;
1800
charset[i++] = 0x118ff; // WARANG CITI OM
1801
// 11900..1195F; Dives Akuru
1802
c = 0x11900; // from DIVES AKURU LETTER A
1803
while (c <= 0x11906) // ..to DIVES AKURU LETTER E
1804
charset[i++] = c++;
1805
c = 0x1190c; // from DIVES AKURU LETTER KA
1806
while (c <= 0x11913) // ..to DIVES AKURU LETTER JA
1807
charset[i++] = c++;
1808
charset[i++] = 0x11915; // DIVES AKURU LETTER NYA
1809
charset[i++] = 0x11916; // DIVES AKURU LETTER TTA
1810
c = 0x11918; // from DIVES AKURU LETTER DDA
1811
while (c <= 0x11935) // ..to DIVES AKURU VOWEL SIGN E
1812
charset[i++] = c++;
1813
charset[i++] = 0x11937; // DIVES AKURU VOWEL SIGN AI
1814
charset[i++] = 0x11938; // DIVES AKURU VOWEL SIGN O
1815
c = 0x1193b; // from DIVES AKURU SIGN ANUSVARA
1816
while (c <= 0x11946) // ..to DIVES AKURU END OF TEXT MARK
1817
charset[i++] = c++;
1818
c = 0x11950; // from DIVES AKURU DIGIT ZERO
1819
while (c <= 0x11959) // ..to DIVES AKURU DIGIT NINE
1820
charset[i++] = c++;
1821
// 119A0..119FF; Nandinagari
1822
c = 0x119a0; // from NANDINAGARI LETTER A
1823
while (c <= 0x119a7) // ..to NANDINAGARI LETTER VOCALIC RR
1824
charset[i++] = c++;
1825
c = 0x119aa; // from NANDINAGARI LETTER E
1826
while (c <= 0x119d7) // ..to NANDINAGARI VOWEL SIGN VOCALIC RR
1827
charset[i++] = c++;
1828
c = 0x119da; // from NANDINAGARI VOWEL SIGN E
1829
while (c <= 0x119e4) // ..to NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
1830
charset[i++] = c++;
1831
// 11A00..11A4F; Zanabazar Square
1832
c = 0x11a00; // from ZANABAZAR SQUARE LETTER A
1833
while (c <= 0x11a47) // ..to ZANABAZAR SQUARE SUBJOINER
1834
charset[i++] = c++;
1835
// 11A50..11AAF; Soyombo
1836
c = 0x11a50; // from SOYOMBO LETTER A
1837
while (c <= 0x11aa2) // ..to SOYOMBO TERMINAL MARK-2
1838
charset[i++] = c++;
1839
// 11AC0..11AFF; Pau Cin Hau
1840
c = 0x11ac0; // from PAU CIN HAU LETTER PA
1841
while (c <= 0x11af8) // ..to PAU CIN HAU GLOTTAL STOP FINAL
1842
charset[i++] = c++;
1843
// 11C00..11C6F; Bhaiksuki
1844
c = 0x11c00; // from BHAIKSUKI LETTER A
1845
while (c <= 0x11c08) // ..to BHAIKSUKI LETTER VOCALIC L
1846
charset[i++] = c++;
1847
c = 0x11c0a; // from BHAIKSUKI LETTER E
1848
while (c <= 0x11c36) // ..to BHAIKSUKI VOWEL SIGN VOCALIC L
1849
charset[i++] = c++;
1850
c = 0x11c38; // from BHAIKSUKI VOWEL SIGN E
1851
while (c <= 0x11c45) // ..to BHAIKSUKI GAP FILLER-2
1852
charset[i++] = c++;
1853
c = 0x11c50; // from BHAIKSUKI DIGIT ZERO
1854
while (c <= 0x11c6c) // ..to BHAIKSUKI HUNDREDS UNIT MARK
1855
charset[i++] = c++;
1856
// 11C70..11CBF; Marchen
1857
c = 0x11c70; // from MARCHEN HEAD MARK
1858
while (c <= 0x11c8f) // ..to MARCHEN LETTER A
1859
charset[i++] = c++;
1860
c = 0x11c92; // from MARCHEN SUBJOINED LETTER KA
1861
while (c <= 0x11ca7) // ..to MARCHEN SUBJOINED LETTER ZA
1862
charset[i++] = c++;
1863
c = 0x11ca9; // from MARCHEN SUBJOINED LETTER YA
1864
while (c <= 0x11cb6) // ..to MARCHEN SIGN CANDRABINDU
1865
charset[i++] = c++;
1866
// 11D00..11D5F; Masaram Gondi
1867
c = 0x11d00; // from MASARAM GONDI LETTER A
1868
while (c <= 0x11d06) // ..to MASARAM GONDI LETTER E
1869
charset[i++] = c++;
1870
charset[i++] = 0x11d08; // MASARAM GONDI LETTER AI
1871
charset[i++] = 0x11d09; // MASARAM GONDI LETTER O
1872
c = 0x11d0b; // from MASARAM GONDI LETTER AU
1873
while (c <= 0x11d36) // ..to MASARAM GONDI VOWEL SIGN VOCALIC R
1874
charset[i++] = c++;
1875
charset[i++] = 0x11d3c; // MASARAM GONDI VOWEL SIGN AI
1876
charset[i++] = 0x11d3d; // MASARAM GONDI VOWEL SIGN O
1877
c = 0x11d3f; // from MASARAM GONDI VOWEL SIGN AU
1878
while (c <= 0x11d47) // ..to MASARAM GONDI RA-KARA
1879
charset[i++] = c++;
1880
c = 0x11d50; // from MASARAM GONDI DIGIT ZERO
1881
while (c <= 0x11d59) // ..to MASARAM GONDI DIGIT NINE
1882
charset[i++] = c++;
1883
// 11D60..11DAF; Gunjala Gondi
1884
c = 0x11d60; // from GUNJALA GONDI LETTER A
1885
while (c <= 0x11d65) // ..to GUNJALA GONDI LETTER UU
1886
charset[i++] = c++;
1887
charset[i++] = 0x11d67; // GUNJALA GONDI LETTER EE
1888
charset[i++] = 0x11d68; // GUNJALA GONDI LETTER AI
1889
c = 0x11d6a; // from GUNJALA GONDI LETTER OO
1890
while (c <= 0x11d8e) // ..to GUNJALA GONDI VOWEL SIGN UU
1891
charset[i++] = c++;
1892
charset[i++] = 0x11d90; // GUNJALA GONDI VOWEL SIGN EE
1893
charset[i++] = 0x11d91; // GUNJALA GONDI VOWEL SIGN AI
1894
c = 0x11d93; // from GUNJALA GONDI VOWEL SIGN OO
1895
while (c <= 0x11d98) // ..to GUNJALA GONDI OM
1896
charset[i++] = c++;
1897
c = 0x11da0; // from GUNJALA GONDI DIGIT ZERO
1898
while (c <= 0x11da9) // ..to GUNJALA GONDI DIGIT NINE
1899
charset[i++] = c++;
1900
// 11EE0..11EFF; Makasar
1901
c = 0x11ee0; // from MAKASAR LETTER KA
1902
while (c <= 0x11ef8) // ..to MAKASAR END OF SECTION
1903
charset[i++] = c++;
1904
// 11FB0..11FBF; Lisu Supplement
1905
charset[i++] = 0x11fb0; // LISU LETTER YHA
1906
// 11FC0..11FFF; Tamil Supplement
1907
c = 0x11fc0; // from TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH
1908
while (c <= 0x11ff1) // ..to TAMIL SIGN VAKAIYARAA
1909
charset[i++] = c++;
1910
charset[i++] = 0x11fff; // TAMIL PUNCTUATION END OF TEXT
1911
// 12000..123FF; Cuneiform
1912
c = 0x12000; // from CUNEIFORM SIGN A
1913
while (c <= 0x12399) // ..to CUNEIFORM SIGN U U
1914
charset[i++] = c++;
1915
// 12400..1247F; Cuneiform Numbers and Punctuation
1916
c = 0x12400; // from CUNEIFORM NUMERIC SIGN TWO ASH
1917
while (c <= 0x1246e) // ..to CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
1918
charset[i++] = c++;
1919
c = 0x12470; // from CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER
1920
while (c <= 0x12474) // ..to CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
1921
charset[i++] = c++;
1922
// 12480..1254F; Early Dynastic Cuneiform
1923
c = 0x12480; // from CUNEIFORM SIGN AB TIMES NUN TENU
1924
while (c <= 0x12543) // ..to CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
1925
charset[i++] = c++;
1926
// 13000..1342F; Egyptian Hieroglyphs
1927
c = 0x13000; // from EGYPTIAN HIEROGLYPH A001
1928
while (c <= 0x1342e) // ..to EGYPTIAN HIEROGLYPH AA032
1929
charset[i++] = c++;
1930
// 13430..1343F; Egyptian Hieroglyph Format Controls
1931
c = 0x13430; // from EGYPTIAN HIEROGLYPH VERTICAL JOINER
1932
while (c <= 0x13438) // ..to EGYPTIAN HIEROGLYPH END SEGMENT
1933
charset[i++] = c++;
1934
// 14400..1467F; Anatolian Hieroglyphs
1935
c = 0x14400; // from ANATOLIAN HIEROGLYPH A001
1936
while (c <= 0x14646) // ..to ANATOLIAN HIEROGLYPH A530
1937
charset[i++] = c++;
1938
// 16800..16A3F; Bamum Supplement
1939
c = 0x16800; // from BAMUM LETTER PHASE-A NGKUE MFON
1940
while (c <= 0x16a38) // ..to BAMUM LETTER PHASE-F VUEQ
1941
charset[i++] = c++;
1942
// 16A40..16A6F; Mro
1943
c = 0x16a40; // from MRO LETTER TA
1944
while (c <= 0x16a5e) // ..to MRO LETTER TEK
1945
charset[i++] = c++;
1946
c = 0x16a60; // from MRO DIGIT ZERO
1947
while (c <= 0x16a69) // ..to MRO DIGIT NINE
1948
charset[i++] = c++;
1949
charset[i++] = 0x16a6e; // MRO DANDA
1950
charset[i++] = 0x16a6f; // MRO DOUBLE DANDA
1951
// 16AD0..16AFF; Bassa Vah
1952
c = 0x16ad0; // from BASSA VAH LETTER ENNI
1953
while (c <= 0x16aed) // ..to BASSA VAH LETTER I
1954
charset[i++] = c++;
1955
c = 0x16af0; // from BASSA VAH COMBINING HIGH TONE
1956
while (c <= 0x16af5) // ..to BASSA VAH FULL STOP
1957
charset[i++] = c++;
1958
// 16B00..16B8F; Pahawh Hmong
1959
c = 0x16b00; // from PAHAWH HMONG VOWEL KEEB
1960
while (c <= 0x16b45) // ..to PAHAWH HMONG SIGN CIM TSOV ROG
1961
charset[i++] = c++;
1962
c = 0x16b50; // from PAHAWH HMONG DIGIT ZERO
1963
while (c <= 0x16b59) // ..to PAHAWH HMONG DIGIT NINE
1964
charset[i++] = c++;
1965
c = 0x16b5b; // from PAHAWH HMONG NUMBER TENS
1966
while (c <= 0x16b61) // ..to PAHAWH HMONG NUMBER TRILLIONS
1967
charset[i++] = c++;
1968
c = 0x16b63; // from PAHAWH HMONG SIGN VOS LUB
1969
while (c <= 0x16b77) // ..to PAHAWH HMONG SIGN CIM NRES TOS
1970
charset[i++] = c++;
1971
c = 0x16b7d; // from PAHAWH HMONG CLAN SIGN TSHEEJ
1972
while (c <= 0x16b8f) // ..to PAHAWH HMONG CLAN SIGN VWJ
1973
charset[i++] = c++;
1974
// 16E40..16E9F; Medefaidrin
1975
c = 0x16e40; // from MEDEFAIDRIN CAPITAL LETTER M
1976
while (c <= 0x16e9a) // ..to MEDEFAIDRIN EXCLAMATION OH
1977
charset[i++] = c++;
1978
// 16F00..16F9F; Miao
1979
c = 0x16f00; // from MIAO LETTER PA
1980
while (c <= 0x16f4a) // ..to MIAO LETTER RTE
1981
charset[i++] = c++;
1982
c = 0x16f4f; // from MIAO SIGN CONSONANT MODIFIER BAR
1983
while (c <= 0x16f87) // ..to MIAO VOWEL SIGN UI
1984
charset[i++] = c++;
1985
c = 0x16f8f; // from MIAO TONE RIGHT
1986
while (c <= 0x16f9f) // ..to MIAO LETTER REFORMED TONE-8
1987
charset[i++] = c++;
1988
// 16FE0..16FFF; Ideographic Symbols and Punctuation
1989
c = 0x16fe0; // from TANGUT ITERATION MARK
1990
while (c <= 0x16fe4) // ..to KHITAN SMALL SCRIPT FILLER
1991
charset[i++] = c++;
1992
charset[i++] = 0x16ff0; // VIETNAMESE ALTERNATE READING MARK CA
1993
charset[i++] = 0x16ff1; // VIETNAMESE ALTERNATE READING MARK NHAY
1994
// 17000..187FF; Tangut
1995
c = 0x17000; // from <Tangut Ideograph, First>
1996
while (c <= 0x187f7) // ..to <Tangut Ideograph, Last>
1997
charset[i++] = c++;
1998
// 18800..18AFF; Tangut Components
1999
c = 0x18800; // from TANGUT COMPONENT-001
2000
while (c <= 0x18aff) // ..to TANGUT COMPONENT-768
2001
charset[i++] = c++;
2002
// 18B00..18CFF; Khitan Small Script
2003
c = 0x18b00; // from KHITAN SMALL SCRIPT CHARACTER-18B00
2004
while (c <= 0x18cd5) // ..to KHITAN SMALL SCRIPT CHARACTER-18CD5
2005
charset[i++] = c++;
2006
// 18D00..18D8F; Tangut Supplement
2007
c = 0x18d00; // from <Tangut Ideograph Supplement, First>
2008
while (c <= 0x18d08) // ..to <Tangut Ideograph Supplement, Last>
2009
charset[i++] = c++;
2010
// 1B000..1B0FF; Kana Supplement
2011
c = 0x1b000; // from KATAKANA LETTER ARCHAIC E
2012
while (c <= 0x1b0ff) // ..to HENTAIGANA LETTER RE-2
2013
charset[i++] = c++;
2014
// 1B100..1B12F; Kana Extended-A
2015
c = 0x1b100; // from HENTAIGANA LETTER RE-3
2016
while (c <= 0x1b11e) // ..to HENTAIGANA LETTER N-MU-MO-2
2017
charset[i++] = c++;
2018
// 1B130..1B16F; Small Kana Extension
2019
charset[i++] = 0x1b150; // HIRAGANA LETTER SMALL WI
2020
charset[i++] = 0x1b152; // HIRAGANA LETTER SMALL WO
2021
c = 0x1b164; // from KATAKANA LETTER SMALL WI
2022
while (c <= 0x1b167) // ..to KATAKANA LETTER SMALL N
2023
charset[i++] = c++;
2024
// 1B170..1B2FF; Nushu
2025
c = 0x1b170; // from NUSHU CHARACTER-1B170
2026
while (c <= 0x1b2fb) // ..to NUSHU CHARACTER-1B2FB
2027
charset[i++] = c++;
2028
// 1BC00..1BC9F; Duployan
2029
c = 0x1bc00; // from DUPLOYAN LETTER H
2030
while (c <= 0x1bc6a) // ..to DUPLOYAN LETTER VOCALIC M
2031
charset[i++] = c++;
2032
c = 0x1bc70; // from DUPLOYAN AFFIX LEFT HORIZONTAL SECANT
2033
while (c <= 0x1bc7c) // ..to DUPLOYAN AFFIX ATTACHED TANGENT HOOK
2034
charset[i++] = c++;
2035
c = 0x1bc80; // from DUPLOYAN AFFIX HIGH ACUTE
2036
while (c <= 0x1bc88) // ..to DUPLOYAN AFFIX HIGH VERTICAL
2037
charset[i++] = c++;
2038
c = 0x1bc90; // from DUPLOYAN AFFIX LOW ACUTE
2039
while (c <= 0x1bc99) // ..to DUPLOYAN AFFIX LOW ARROW
2040
charset[i++] = c++;
2041
c = 0x1bc9c; // from DUPLOYAN SIGN O WITH CROSS
2042
while (c <= 0x1bc9f) // ..to DUPLOYAN PUNCTUATION CHINOOK FULL STOP
2043
charset[i++] = c++;
2044
// 1BCA0..1BCAF; Shorthand Format Controls
2045
c = 0x1bca0; // from SHORTHAND FORMAT LETTER OVERLAP
2046
while (c <= 0x1bca3) // ..to SHORTHAND FORMAT UP STEP
2047
charset[i++] = c++;
2048
// 1D000..1D0FF; Byzantine Musical Symbols
2049
c = 0x1d000; // from BYZANTINE MUSICAL SYMBOL PSILI
2050
while (c <= 0x1d0f5) // ..to BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
2051
charset[i++] = c++;
2052
// 1D100..1D1FF; Musical Symbols
2053
c = 0x1d100; // from MUSICAL SYMBOL SINGLE BARLINE
2054
while (c <= 0x1d126) // ..to MUSICAL SYMBOL DRUM CLEF-2
2055
charset[i++] = c++;
2056
c = 0x1d129; // from MUSICAL SYMBOL MULTIPLE MEASURE REST
2057
while (c <= 0x1d1e8) // ..to MUSICAL SYMBOL KIEVAN FLAT SIGN
2058
charset[i++] = c++;
2059
// 1D200..1D24F; Ancient Greek Musical Notation
2060
c = 0x1d200; // from GREEK VOCAL NOTATION SYMBOL-1
2061
while (c <= 0x1d245) // ..to GREEK MUSICAL LEIMMA
2062
charset[i++] = c++;
2063
// 1D2E0..1D2FF; Mayan Numerals
2064
c = 0x1d2e0; // from MAYAN NUMERAL ZERO
2065
while (c <= 0x1d2f3) // ..to MAYAN NUMERAL NINETEEN
2066
charset[i++] = c++;
2067
// 1D300..1D35F; Tai Xuan Jing Symbols
2068
c = 0x1d300; // from MONOGRAM FOR EARTH
2069
while (c <= 0x1d356) // ..to TETRAGRAM FOR FOSTERING
2070
charset[i++] = c++;
2071
// 1D360..1D37F; Counting Rod Numerals
2072
c = 0x1d360; // from COUNTING ROD UNIT DIGIT ONE
2073
while (c <= 0x1d378) // ..to TALLY MARK FIVE
2074
charset[i++] = c++;
2075
// 1D400..1D7FF; Mathematical Alphanumeric Symbols
2076
c = 0x1d400; // from MATHEMATICAL BOLD CAPITAL A
2077
while (c <= 0x1d454) // ..to MATHEMATICAL ITALIC SMALL G
2078
charset[i++] = c++;
2079
c = 0x1d456; // from MATHEMATICAL ITALIC SMALL I
2080
while (c <= 0x1d49c) // ..to MATHEMATICAL SCRIPT CAPITAL A
2081
charset[i++] = c++;
2082
charset[i++] = 0x1d49e; // MATHEMATICAL SCRIPT CAPITAL C
2083
charset[i++] = 0x1d49f; // MATHEMATICAL SCRIPT CAPITAL D
2084
charset[i++] = 0x1d4a5; // MATHEMATICAL SCRIPT CAPITAL J
2085
charset[i++] = 0x1d4a6; // MATHEMATICAL SCRIPT CAPITAL K
2086
c = 0x1d4a9; // from MATHEMATICAL SCRIPT CAPITAL N
2087
while (c <= 0x1d4ac) // ..to MATHEMATICAL SCRIPT CAPITAL Q
2088
charset[i++] = c++;
2089
c = 0x1d4ae; // from MATHEMATICAL SCRIPT CAPITAL S
2090
while (c <= 0x1d4b9) // ..to MATHEMATICAL SCRIPT SMALL D
2091
charset[i++] = c++;
2092
c = 0x1d4bd; // from MATHEMATICAL SCRIPT SMALL H
2093
while (c <= 0x1d4c3) // ..to MATHEMATICAL SCRIPT SMALL N
2094
charset[i++] = c++;
2095
c = 0x1d4c5; // from MATHEMATICAL SCRIPT SMALL P
2096
while (c <= 0x1d505) // ..to MATHEMATICAL FRAKTUR CAPITAL B
2097
charset[i++] = c++;
2098
c = 0x1d507; // from MATHEMATICAL FRAKTUR CAPITAL D
2099
while (c <= 0x1d50a) // ..to MATHEMATICAL FRAKTUR CAPITAL G
2100
charset[i++] = c++;
2101
c = 0x1d50d; // from MATHEMATICAL FRAKTUR CAPITAL J
2102
while (c <= 0x1d514) // ..to MATHEMATICAL FRAKTUR CAPITAL Q
2103
charset[i++] = c++;
2104
c = 0x1d516; // from MATHEMATICAL FRAKTUR CAPITAL S
2105
while (c <= 0x1d51c) // ..to MATHEMATICAL FRAKTUR CAPITAL Y
2106
charset[i++] = c++;
2107
c = 0x1d51e; // from MATHEMATICAL FRAKTUR SMALL A
2108
while (c <= 0x1d539) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL B
2109
charset[i++] = c++;
2110
c = 0x1d53b; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL D
2111
while (c <= 0x1d53e) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL G
2112
charset[i++] = c++;
2113
c = 0x1d540; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL I
2114
while (c <= 0x1d544) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL M
2115
charset[i++] = c++;
2116
c = 0x1d54a; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL S
2117
while (c <= 0x1d550) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
2118
charset[i++] = c++;
2119
c = 0x1d552; // from MATHEMATICAL DOUBLE-STRUCK SMALL A
2120
while (c <= 0x1d6a5) // ..to MATHEMATICAL ITALIC SMALL DOTLESS J
2121
charset[i++] = c++;
2122
c = 0x1d6a8; // from MATHEMATICAL BOLD CAPITAL ALPHA
2123
while (c <= 0x1d7cb) // ..to MATHEMATICAL BOLD SMALL DIGAMMA
2124
charset[i++] = c++;
2125
c = 0x1d7ce; // from MATHEMATICAL BOLD DIGIT ZERO
2126
while (c <= 0x1d7ff) // ..to MATHEMATICAL MONOSPACE DIGIT NINE
2127
charset[i++] = c++;
2128
// 1D800..1DAAF; Sutton SignWriting
2129
c = 0x1d800; // from SIGNWRITING HAND-FIST INDEX
2130
while (c <= 0x1da8b) // ..to SIGNWRITING PARENTHESIS
2131
charset[i++] = c++;
2132
c = 0x1da9b; // from SIGNWRITING FILL MODIFIER-2
2133
while (c <= 0x1da9f) // ..to SIGNWRITING FILL MODIFIER-6
2134
charset[i++] = c++;
2135
c = 0x1daa1; // from SIGNWRITING ROTATION MODIFIER-2
2136
while (c <= 0x1daaf) // ..to SIGNWRITING ROTATION MODIFIER-16
2137
charset[i++] = c++;
2138
// 1E000..1E02F; Glagolitic Supplement
2139
c = 0x1e000; // from COMBINING GLAGOLITIC LETTER AZU
2140
while (c <= 0x1e006) // ..to COMBINING GLAGOLITIC LETTER ZHIVETE
2141
charset[i++] = c++;
2142
c = 0x1e008; // from COMBINING GLAGOLITIC LETTER ZEMLJA
2143
while (c <= 0x1e018) // ..to COMBINING GLAGOLITIC LETTER HERU
2144
charset[i++] = c++;
2145
c = 0x1e01b; // from COMBINING GLAGOLITIC LETTER SHTA
2146
while (c <= 0x1e021) // ..to COMBINING GLAGOLITIC LETTER YATI
2147
charset[i++] = c++;
2148
charset[i++] = 0x1e023; // COMBINING GLAGOLITIC LETTER YU
2149
charset[i++] = 0x1e024; // COMBINING GLAGOLITIC LETTER SMALL YUS
2150
c = 0x1e026; // from COMBINING GLAGOLITIC LETTER YO
2151
while (c <= 0x1e02a) // ..to COMBINING GLAGOLITIC LETTER FITA
2152
charset[i++] = c++;
2153
// 1E100..1E14F; Nyiakeng Puachue Hmong
2154
c = 0x1e100; // from NYIAKENG PUACHUE HMONG LETTER MA
2155
while (c <= 0x1e12c) // ..to NYIAKENG PUACHUE HMONG LETTER W
2156
charset[i++] = c++;
2157
c = 0x1e130; // from NYIAKENG PUACHUE HMONG TONE-B
2158
while (c <= 0x1e13d) // ..to NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
2159
charset[i++] = c++;
2160
c = 0x1e140; // from NYIAKENG PUACHUE HMONG DIGIT ZERO
2161
while (c <= 0x1e149) // ..to NYIAKENG PUACHUE HMONG DIGIT NINE
2162
charset[i++] = c++;
2163
charset[i++] = 0x1e14e; // NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
2164
charset[i++] = 0x1e14f; // NYIAKENG PUACHUE HMONG CIRCLED CA
2165
// 1E2C0..1E2FF; Wancho
2166
c = 0x1e2c0; // from WANCHO LETTER AA
2167
while (c <= 0x1e2f9) // ..to WANCHO DIGIT NINE
2168
charset[i++] = c++;
2169
charset[i++] = 0x1e2ff; // WANCHO NGUN SIGN
2170
// 1E800..1E8DF; Mende Kikakui
2171
c = 0x1e800; // from MENDE KIKAKUI SYLLABLE M001 KI
2172
while (c <= 0x1e8c4) // ..to MENDE KIKAKUI SYLLABLE M060 NYON
2173
charset[i++] = c++;
2174
c = 0x1e8c7; // from MENDE KIKAKUI DIGIT ONE
2175
while (c <= 0x1e8d6) // ..to MENDE KIKAKUI COMBINING NUMBER MILLIONS
2176
charset[i++] = c++;
2177
// 1E900..1E95F; Adlam
2178
c = 0x1e900; // from ADLAM CAPITAL LETTER ALIF
2179
while (c <= 0x1e94b) // ..to ADLAM NASALIZATION MARK
2180
charset[i++] = c++;
2181
c = 0x1e950; // from ADLAM DIGIT ZERO
2182
while (c <= 0x1e959) // ..to ADLAM DIGIT NINE
2183
charset[i++] = c++;
2184
charset[i++] = 0x1e95e; // ADLAM INITIAL EXCLAMATION MARK
2185
charset[i++] = 0x1e95f; // ADLAM INITIAL QUESTION MARK
2186
// 1EC70..1ECBF; Indic Siyaq Numbers
2187
c = 0x1ec71; // from INDIC SIYAQ NUMBER ONE
2188
while (c <= 0x1ecb4) // ..to INDIC SIYAQ ALTERNATE LAKH MARK
2189
charset[i++] = c++;
2190
// 1ED00..1ED4F; Ottoman Siyaq Numbers
2191
c = 0x1ed01; // from OTTOMAN SIYAQ NUMBER ONE
2192
while (c <= 0x1ed3d) // ..to OTTOMAN SIYAQ FRACTION ONE SIXTH
2193
charset[i++] = c++;
2194
// 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2195
c = 0x1ee00; // from ARABIC MATHEMATICAL ALEF
2196
while (c <= 0x1ee03) // ..to ARABIC MATHEMATICAL DAL
2197
charset[i++] = c++;
2198
c = 0x1ee05; // from ARABIC MATHEMATICAL WAW
2199
while (c <= 0x1ee1f) // ..to ARABIC MATHEMATICAL DOTLESS QAF
2200
charset[i++] = c++;
2201
charset[i++] = 0x1ee21; // ARABIC MATHEMATICAL INITIAL BEH
2202
charset[i++] = 0x1ee22; // ARABIC MATHEMATICAL INITIAL JEEM
2203
c = 0x1ee29; // from ARABIC MATHEMATICAL INITIAL YEH
2204
while (c <= 0x1ee32) // ..to ARABIC MATHEMATICAL INITIAL QAF
2205
charset[i++] = c++;
2206
c = 0x1ee34; // from ARABIC MATHEMATICAL INITIAL SHEEN
2207
while (c <= 0x1ee37) // ..to ARABIC MATHEMATICAL INITIAL KHAH
2208
charset[i++] = c++;
2209
charset[i++] = 0x1ee4d; // ARABIC MATHEMATICAL TAILED NOON
2210
charset[i++] = 0x1ee4f; // ARABIC MATHEMATICAL TAILED AIN
2211
charset[i++] = 0x1ee51; // ARABIC MATHEMATICAL TAILED SAD
2212
charset[i++] = 0x1ee52; // ARABIC MATHEMATICAL TAILED QAF
2213
charset[i++] = 0x1ee61; // ARABIC MATHEMATICAL STRETCHED BEH
2214
charset[i++] = 0x1ee62; // ARABIC MATHEMATICAL STRETCHED JEEM
2215
c = 0x1ee67; // from ARABIC MATHEMATICAL STRETCHED HAH
2216
while (c <= 0x1ee6a) // ..to ARABIC MATHEMATICAL STRETCHED KAF
2217
charset[i++] = c++;
2218
c = 0x1ee6c; // from ARABIC MATHEMATICAL STRETCHED MEEM
2219
while (c <= 0x1ee72) // ..to ARABIC MATHEMATICAL STRETCHED QAF
2220
charset[i++] = c++;
2221
c = 0x1ee74; // from ARABIC MATHEMATICAL STRETCHED SHEEN
2222
while (c <= 0x1ee77) // ..to ARABIC MATHEMATICAL STRETCHED KHAH
2223
charset[i++] = c++;
2224
c = 0x1ee79; // from ARABIC MATHEMATICAL STRETCHED DAD
2225
while (c <= 0x1ee7c) // ..to ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
2226
charset[i++] = c++;
2227
c = 0x1ee80; // from ARABIC MATHEMATICAL LOOPED ALEF
2228
while (c <= 0x1ee89) // ..to ARABIC MATHEMATICAL LOOPED YEH
2229
charset[i++] = c++;
2230
c = 0x1ee8b; // from ARABIC MATHEMATICAL LOOPED LAM
2231
while (c <= 0x1ee9b) // ..to ARABIC MATHEMATICAL LOOPED GHAIN
2232
charset[i++] = c++;
2233
charset[i++] = 0x1eea1; // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH
2234
charset[i++] = 0x1eea3; // ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
2235
c = 0x1eea5; // from ARABIC MATHEMATICAL DOUBLE-STRUCK WAW
2236
while (c <= 0x1eea9) // ..to ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
2237
charset[i++] = c++;
2238
c = 0x1eeab; // from ARABIC MATHEMATICAL DOUBLE-STRUCK LAM
2239
while (c <= 0x1eebb) // ..to ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
2240
charset[i++] = c++;
2241
charset[i++] = 0x1eef0; // ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL
2242
charset[i++] = 0x1eef1; // ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
2243
// 1F000..1F02F; Mahjong Tiles
2244
c = 0x1f000; // from MAHJONG TILE EAST WIND
2245
while (c <= 0x1f02b) // ..to MAHJONG TILE BACK
2246
charset[i++] = c++;
2247
// 1F030..1F09F; Domino Tiles
2248
c = 0x1f030; // from DOMINO TILE HORIZONTAL BACK
2249
while (c <= 0x1f093) // ..to DOMINO TILE VERTICAL-06-06
2250
charset[i++] = c++;
2251
// 1F0A0..1F0FF; Playing Cards
2252
c = 0x1f0a0; // from PLAYING CARD BACK
2253
while (c <= 0x1f0ae) // ..to PLAYING CARD KING OF SPADES
2254
charset[i++] = c++;
2255
c = 0x1f0b1; // from PLAYING CARD ACE OF HEARTS
2256
while (c <= 0x1f0bf) // ..to PLAYING CARD RED JOKER
2257
charset[i++] = c++;
2258
c = 0x1f0c1; // from PLAYING CARD ACE OF DIAMONDS
2259
while (c <= 0x1f0cf) // ..to PLAYING CARD BLACK JOKER
2260
charset[i++] = c++;
2261
c = 0x1f0d1; // from PLAYING CARD ACE OF CLUBS
2262
while (c <= 0x1f0f5) // ..to PLAYING CARD TRUMP-21
2263
charset[i++] = c++;
2264
// 1F100..1F1FF; Enclosed Alphanumeric Supplement
2265
c = 0x1f100; // from DIGIT ZERO FULL STOP
2266
while (c <= 0x1f1ad) // ..to MASK WORK SYMBOL
2267
charset[i++] = c++;
2268
c = 0x1f1e6; // from REGIONAL INDICATOR SYMBOL LETTER A
2269
while (c <= 0x1f1ff) // ..to REGIONAL INDICATOR SYMBOL LETTER Z
2270
charset[i++] = c++;
2271
// 1F200..1F2FF; Enclosed Ideographic Supplement
2272
charset[i++] = 0x1f200; // SQUARE HIRAGANA HOKA
2273
charset[i++] = 0x1f202; // SQUARED KATAKANA SA
2274
c = 0x1f210; // from SQUARED CJK UNIFIED IDEOGRAPH-624B
2275
while (c <= 0x1f23b) // ..to SQUARED CJK UNIFIED IDEOGRAPH-914D
2276
charset[i++] = c++;
2277
c = 0x1f240; // from TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C
2278
while (c <= 0x1f248) // ..to TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
2279
charset[i++] = c++;
2280
charset[i++] = 0x1f250; // CIRCLED IDEOGRAPH ADVANTAGE
2281
charset[i++] = 0x1f251; // CIRCLED IDEOGRAPH ACCEPT
2282
c = 0x1f260; // from ROUNDED SYMBOL FOR FU
2283
while (c <= 0x1f265) // ..to ROUNDED SYMBOL FOR CAI
2284
charset[i++] = c++;
2285
// 1F300..1F5FF; Miscellaneous Symbols and Pictographs
2286
c = 0x1f300; // from CYCLONE
2287
while (c <= 0x1f5ff) // ..to MOYAI
2288
charset[i++] = c++;
2289
// 1F600..1F64F; Emoticons
2290
c = 0x1f600; // from GRINNING FACE
2291
while (c <= 0x1f64f) // ..to PERSON WITH FOLDED HANDS
2292
charset[i++] = c++;
2293
// 1F650..1F67F; Ornamental Dingbats
2294
c = 0x1f650; // from NORTH WEST POINTING LEAF
2295
while (c <= 0x1f67f) // ..to REVERSE CHECKER BOARD
2296
charset[i++] = c++;
2297
// 1F680..1F6FF; Transport and Map Symbols
2298
c = 0x1f680; // from ROCKET
2299
while (c <= 0x1f6d7) // ..to ELEVATOR
2300
charset[i++] = c++;
2301
c = 0x1f6e0; // from HAMMER AND WRENCH
2302
while (c <= 0x1f6ec) // ..to AIRPLANE ARRIVING
2303
charset[i++] = c++;
2304
c = 0x1f6f0; // from SATELLITE
2305
while (c <= 0x1f6fc) // ..to ROLLER SKATE
2306
charset[i++] = c++;
2307
// 1F700..1F77F; Alchemical Symbols
2308
c = 0x1f700; // from ALCHEMICAL SYMBOL FOR QUINTESSENCE
2309
while (c <= 0x1f773) // ..to ALCHEMICAL SYMBOL FOR HALF OUNCE
2310
charset[i++] = c++;
2311
// 1F780..1F7FF; Geometric Shapes Extended
2312
c = 0x1f780; // from BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE
2313
while (c <= 0x1f7d8) // ..to NEGATIVE CIRCLED SQUARE
2314
charset[i++] = c++;
2315
c = 0x1f7e0; // from LARGE ORANGE CIRCLE
2316
while (c <= 0x1f7eb) // ..to LARGE BROWN SQUARE
2317
charset[i++] = c++;
2318
// 1F800..1F8FF; Supplemental Arrows-C
2319
c = 0x1f800; // from LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD
2320
while (c <= 0x1f80b) // ..to DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
2321
charset[i++] = c++;
2322
c = 0x1f810; // from LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD
2323
while (c <= 0x1f847) // ..to DOWNWARDS HEAVY ARROW
2324
charset[i++] = c++;
2325
c = 0x1f850; // from LEFTWARDS SANS-SERIF ARROW
2326
while (c <= 0x1f859) // ..to UP DOWN SANS-SERIF ARROW
2327
charset[i++] = c++;
2328
c = 0x1f860; // from WIDE-HEADED LEFTWARDS LIGHT BARB ARROW
2329
while (c <= 0x1f887) // ..to WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
2330
charset[i++] = c++;
2331
c = 0x1f890; // from LEFTWARDS TRIANGLE ARROWHEAD
2332
while (c <= 0x1f8ad) // ..to WHITE ARROW SHAFT WIDTH TWO THIRDS
2333
charset[i++] = c++;
2334
charset[i++] = 0x1f8b0; // ARROW POINTING UPWARDS THEN NORTH WEST
2335
charset[i++] = 0x1f8b1; // ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
2336
// 1F900..1F9FF; Supplemental Symbols and Pictographs
2337
c = 0x1f900; // from CIRCLED CROSS FORMEE WITH FOUR DOTS
2338
while (c <= 0x1f978) // ..to DISGUISED FACE
2339
charset[i++] = c++;
2340
c = 0x1f97a; // from FACE WITH PLEADING EYES
2341
while (c <= 0x1f9cb) // ..to BUBBLE TEA
2342
charset[i++] = c++;
2343
c = 0x1f9cd; // from STANDING PERSON
2344
while (c <= 0x1f9ff) // ..to NAZAR AMULET
2345
charset[i++] = c++;
2346
// 1FA00..1FA6F; Chess Symbols
2347
c = 0x1fa00; // from NEUTRAL CHESS KING
2348
while (c <= 0x1fa53) // ..to BLACK CHESS KNIGHT-BISHOP
2349
charset[i++] = c++;
2350
c = 0x1fa60; // from XIANGQI RED GENERAL
2351
while (c <= 0x1fa6d) // ..to XIANGQI BLACK SOLDIER
2352
charset[i++] = c++;
2353
// 1FA70..1FAFF; Symbols and Pictographs Extended-A
2354
c = 0x1fa70; // from BALLET SHOES
2355
while (c <= 0x1fa74) // ..to THONG SANDAL
2356
charset[i++] = c++;
2357
charset[i++] = 0x1fa78; // DROP OF BLOOD
2358
charset[i++] = 0x1fa7a; // STETHOSCOPE
2359
c = 0x1fa80; // from YO-YO
2360
while (c <= 0x1fa86) // ..to NESTING DOLLS
2361
charset[i++] = c++;
2362
c = 0x1fa90; // from RINGED PLANET
2363
while (c <= 0x1faa8) // ..to ROCK
2364
charset[i++] = c++;
2365
c = 0x1fab0; // from FLY
2366
while (c <= 0x1fab6) // ..to FEATHER
2367
charset[i++] = c++;
2368
charset[i++] = 0x1fac0; // ANATOMICAL HEART
2369
charset[i++] = 0x1fac2; // PEOPLE HUGGING
2370
c = 0x1fad0; // from BLUEBERRIES
2371
while (c <= 0x1fad6) // ..to TEAPOT
2372
charset[i++] = c++;
2373
// 1FB00..1FBFF; Symbols for Legacy Computing
2374
c = 0x1fb00; // from BLOCK SEXTANT-1
2375
while (c <= 0x1fb92) // ..to UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
2376
charset[i++] = c++;
2377
c = 0x1fb94; // from LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK
2378
while (c <= 0x1fbca) // ..to WHITE UP-POINTING CHEVRON
2379
charset[i++] = c++;
2380
c = 0x1fbf0; // from SEGMENTED DIGIT ZERO
2381
while (c <= 0x1fbf9) // ..to SEGMENTED DIGIT NINE
2382
charset[i++] = c++;
2383
// 20000..2A6DF; CJK Unified Ideographs Extension B
2384
c = 0x20000; // from <CJK Ideograph Extension B, First>
2385
while (c <= 0x2a6dd) // ..to <CJK Ideograph Extension B, Last>
2386
charset[i++] = c++;
2387
// 2A700..2B73F; CJK Unified Ideographs Extension C
2388
c = 0x2a700; // from <CJK Ideograph Extension C, First>
2389
while (c <= 0x2b734) // ..to <CJK Ideograph Extension C, Last>
2390
charset[i++] = c++;
2391
// 2B740..2B81F; CJK Unified Ideographs Extension D
2392
c = 0x2b740; // from <CJK Ideograph Extension D, First>
2393
while (c <= 0x2b81d) // ..to <CJK Ideograph Extension D, Last>
2394
charset[i++] = c++;
2395
// 2B820..2CEAF; CJK Unified Ideographs Extension E
2396
c = 0x2b820; // from <CJK Ideograph Extension E, First>
2397
while (c <= 0x2cea1) // ..to <CJK Ideograph Extension E, Last>
2398
charset[i++] = c++;
2399
// 2CEB0..2EBEF; CJK Unified Ideographs Extension F
2400
c = 0x2ceb0; // from <CJK Ideograph Extension F, First>
2401
while (c <= 0x2ebe0) // ..to <CJK Ideograph Extension F, Last>
2402
charset[i++] = c++;
2403
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2404
c = 0x2f800; // from CJK COMPATIBILITY IDEOGRAPH-2F800
2405
while (c <= 0x2fa1d) // ..to CJK COMPATIBILITY IDEOGRAPH-2FA1D
2406
charset[i++] = c++;
2407
// 30000..3134F; CJK Unified Ideographs Extension G
2408
c = 0x30000; // from <CJK Ideograph Extension G, First>
2409
while (c <= 0x3134a) // ..to <CJK Ideograph Extension G, Last>
2410
charset[i++] = c++;
2411
// E0000..E007F; Tags
2412
c = 0xe0020; // from TAG SPACE
2413
while (c <= 0xe007f) // ..to CANCEL TAG
2414
charset[i++] = c++;
2415
// E0100..E01EF; Variation Selectors Supplement
2416
// F0000..FFFFF; Supplementary Private Use Area-A
2417
// 100000..10FFFF; Supplementary Private Use Area-B
2418
2419
/* Zero-terminate it, and cache the first character */
2420
charset[i] = 0;
2421
c0 = charset[0];
2422
2423
last = minlength - 1;
2424
i = 0;
2425
while (i <= last) {
2426
id[i] = 0;
2427
word[i++] = c0;
2428
}
2429
lastid = -1;
2430
word[i] = 0;
2431
2432
/* We must init word with dummy data, it doesn't get set until filter() */
2433
word = 1;
2434
}
2435
2436
void generate()
2437
{
2438
int i;
2439
2440
/* Handle the typical case specially */
2441
if (word[last] = charset[++lastid]) return;
2442
2443
lastid = 0;
2444
word[i = last] = c0;
2445
while (i--) { // Have a preceding position?
2446
if (word[i] = charset[++id[i]]) return;
2447
id[i] = 0;
2448
word[i] = c0;
2449
}
2450
2451
if (++last < maxlength) { // Next length?
2452
id[last] = lastid = 0;
2453
word[last] = c0;
2454
word[last + 1] = 0;
2455
} else // We're done
2456
word = 0;
2457
}
2458
2459
void restore()
2460
{
2461
int i, c;
2462
2463
/* Calculate the current length and infer the character indices */
2464
last = 0;
2465
while (c = word[last]) {
2466
i = 0; while (charset[i] != c && charset[i]) i++;
2467
if (!charset[i]) i = 0; // Not found
2468
id[last++] = i;
2469
}
2470
lastid = id[--last];
2471
}
2472
2473