CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
rapid7

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/data/jtr/dumb16.conf
Views: 11765
1
# This software is Copyright (c) 2012-2020 magnum, and it is hereby
2
# released to the general public under the following terms:
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted.
5
#
6
# Generic implementation of "dumb" exhaustive search of Unicode BMP.
7
# Default is to try *all* allocated characters in the BMP of Unicode v13
8
# (there's 55,387 of them). Even if a fast format can exhaust two characters
9
# in 15 minutes, three characters would take 1.5 years...
10
#
11
# Note that these modes will handle --max-len differently than normal: They
12
# will consider number of characters as opposed to number of bytes. This
13
# means you can naturally just use e.g. --max-len=3 for generating all
14
# three-character candidates (which may be up to 9 bytes each).
15
#
16
# Note that the (newer) cracking mode --subsets=full-unicode is way faster than
17
# this external mode, although not as easy to adapt to smaller portions of the
18
# Unicode space. See doc/SUBSETS
19
20
[List.External:Dumb16]
21
int maxlength; // Maximum password length to try
22
int last; // Last character position, zero-based
23
int lastid; // Character index in the last position
24
int id[0x7f]; // Current character indices for other positions
25
int charset[0x10000], c0; // Characters
26
27
void init()
28
{
29
int minlength;
30
int i, c;
31
32
# Trigger UTF-32 handling in External mode
33
utf32 = 1;
34
35
if (req_minlen)
36
minlength = req_minlen;
37
else
38
minlength = 1;
39
if (req_maxlen)
40
maxlength = req_maxlen;
41
else
42
maxlength = 2;
43
44
/*
45
* This defines the character set. This is auto-generated from UnicodeData.txt
46
* and we skip control characters.
47
*/
48
i = 0;
49
// 0000..007F; Basic Latin
50
c = 0x20; // from SPACE
51
while (c <= 0x7e) // ..to TILDE
52
charset[i++] = c++;
53
// 0080..00FF; Latin-1 Supplement
54
c = 0xa0; // from NO-BREAK SPACE
55
while (c <= 0xff) // ..to LATIN SMALL LETTER Y WITH DIAERESIS
56
charset[i++] = c++;
57
// 0100..017F; Latin Extended-A
58
c = 0x100; // from LATIN CAPITAL LETTER A WITH MACRON
59
while (c <= 0x17f) // ..to LATIN SMALL LETTER LONG S
60
charset[i++] = c++;
61
// 0180..024F; Latin Extended-B
62
c = 0x180; // from LATIN SMALL LETTER B WITH STROKE
63
while (c <= 0x24f) // ..to LATIN SMALL LETTER Y WITH STROKE
64
charset[i++] = c++;
65
// 0250..02AF; IPA Extensions
66
c = 0x250; // from LATIN SMALL LETTER TURNED A
67
while (c <= 0x2af) // ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
68
charset[i++] = c++;
69
// 02B0..02FF; Spacing Modifier Letters
70
c = 0x2b0; // from MODIFIER LETTER SMALL H
71
while (c <= 0x2ff) // ..to MODIFIER LETTER LOW LEFT ARROW
72
charset[i++] = c++;
73
// 0300..036F; Combining Diacritical Marks
74
c = 0x300; // from COMBINING GRAVE ACCENT
75
while (c <= 0x36f) // ..to COMBINING LATIN SMALL LETTER X
76
charset[i++] = c++;
77
// 0370..03FF; Greek and Coptic
78
c = 0x370; // from GREEK CAPITAL LETTER HETA
79
while (c <= 0x377) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
80
charset[i++] = c++;
81
c = 0x37a; // from GREEK YPOGEGRAMMENI
82
while (c <= 0x37f) // ..to GREEK CAPITAL LETTER YOT
83
charset[i++] = c++;
84
c = 0x384; // from GREEK TONOS
85
while (c <= 0x38a) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS
86
charset[i++] = c++;
87
c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS
88
while (c <= 0x3a1) // ..to GREEK CAPITAL LETTER RHO
89
charset[i++] = c++;
90
c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA
91
while (c <= 0x3ff) // ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
92
charset[i++] = c++;
93
// 0400..04FF; Cyrillic
94
c = 0x400; // from CYRILLIC CAPITAL LETTER IE WITH GRAVE
95
while (c <= 0x4ff) // ..to CYRILLIC SMALL LETTER HA WITH STROKE
96
charset[i++] = c++;
97
// 0500..052F; Cyrillic Supplement
98
c = 0x500; // from CYRILLIC CAPITAL LETTER KOMI DE
99
while (c <= 0x52f) // ..to CYRILLIC SMALL LETTER EL WITH DESCENDER
100
charset[i++] = c++;
101
// 0530..058F; Armenian
102
c = 0x531; // from ARMENIAN CAPITAL LETTER AYB
103
while (c <= 0x556) // ..to ARMENIAN CAPITAL LETTER FEH
104
charset[i++] = c++;
105
c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING
106
while (c <= 0x58a) // ..to ARMENIAN HYPHEN
107
charset[i++] = c++;
108
charset[i++] = 0x58d; // RIGHT-FACING ARMENIAN ETERNITY SIGN
109
charset[i++] = 0x58f; // ARMENIAN DRAM SIGN
110
// 0590..05FF; Hebrew
111
c = 0x591; // from HEBREW ACCENT ETNAHTA
112
while (c <= 0x5c7) // ..to HEBREW POINT QAMATS QATAN
113
charset[i++] = c++;
114
c = 0x5d0; // from HEBREW LETTER ALEF
115
while (c <= 0x5ea) // ..to HEBREW LETTER TAV
116
charset[i++] = c++;
117
c = 0x5ef; // from HEBREW YOD TRIANGLE
118
while (c <= 0x5f4) // ..to HEBREW PUNCTUATION GERSHAYIM
119
charset[i++] = c++;
120
// 0600..06FF; Arabic
121
c = 0x600; // from ARABIC NUMBER SIGN
122
while (c <= 0x61c) // ..to ARABIC LETTER MARK
123
charset[i++] = c++;
124
c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK
125
while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V
126
charset[i++] = c++;
127
// 0700..074F; Syriac
128
c = 0x700; // from SYRIAC END OF PARAGRAPH
129
while (c <= 0x70d) // ..to SYRIAC HARKLEAN ASTERISCUS
130
charset[i++] = c++;
131
c = 0x70f; // from SYRIAC ABBREVIATION MARK
132
while (c <= 0x74a) // ..to SYRIAC BARREKH
133
charset[i++] = c++;
134
charset[i++] = 0x74d; // SYRIAC LETTER SOGDIAN ZHAIN
135
charset[i++] = 0x74f; // SYRIAC LETTER SOGDIAN FE
136
// 0750..077F; Arabic Supplement
137
c = 0x750; // from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW
138
while (c <= 0x77f) // ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE
139
charset[i++] = c++;
140
// 0780..07BF; Thaana
141
c = 0x780; // from THAANA LETTER HAA
142
while (c <= 0x7b1) // ..to THAANA LETTER NAA
143
charset[i++] = c++;
144
// 07C0..07FF; NKo
145
c = 0x7c0; // from NKO DIGIT ZERO
146
while (c <= 0x7fa) // ..to NKO LAJANYALAN
147
charset[i++] = c++;
148
charset[i++] = 0x7fd; // NKO DANTAYALAN
149
charset[i++] = 0x7ff; // NKO TAMAN SIGN
150
// 0800..083F; Samaritan
151
c = 0x800; // from SAMARITAN LETTER ALAF
152
while (c <= 0x82d) // ..to SAMARITAN MARK NEQUDAA
153
charset[i++] = c++;
154
c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA
155
while (c <= 0x83e) // ..to SAMARITAN PUNCTUATION ANNAAU
156
charset[i++] = c++;
157
// 0840..085F; Mandaic
158
c = 0x840; // from MANDAIC LETTER HALQA
159
while (c <= 0x85b) // ..to MANDAIC GEMINATION MARK
160
charset[i++] = c++;
161
charset[i++] = 0x85e; // MANDAIC PUNCTUATION
162
// 0860..086F; Syriac Supplement
163
c = 0x860; // from SYRIAC LETTER MALAYALAM NGA
164
while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA
165
charset[i++] = c++;
166
// 08A0..08FF; Arabic Extended-A
167
c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW
168
while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW
169
charset[i++] = c++;
170
c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE
171
while (c <= 0x8c7) // ..to ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
172
charset[i++] = c++;
173
c = 0x8d3; // from ARABIC SMALL LOW WAW
174
while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA
175
charset[i++] = c++;
176
// 0900..097F; Devanagari
177
c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU
178
while (c <= 0x97f) // ..to DEVANAGARI LETTER BBA
179
charset[i++] = c++;
180
// 0980..09FF; Bengali
181
c = 0x980; // from BENGALI ANJI
182
while (c <= 0x983) // ..to BENGALI SIGN VISARGA
183
charset[i++] = c++;
184
c = 0x985; // from BENGALI LETTER A
185
while (c <= 0x98c) // ..to BENGALI LETTER VOCALIC L
186
charset[i++] = c++;
187
charset[i++] = 0x98f; // BENGALI LETTER E
188
charset[i++] = 0x990; // BENGALI LETTER AI
189
c = 0x993; // from BENGALI LETTER O
190
while (c <= 0x9a8) // ..to BENGALI LETTER NA
191
charset[i++] = c++;
192
c = 0x9aa; // from BENGALI LETTER PA
193
while (c <= 0x9b0) // ..to BENGALI LETTER RA
194
charset[i++] = c++;
195
c = 0x9b6; // from BENGALI LETTER SHA
196
while (c <= 0x9b9) // ..to BENGALI LETTER HA
197
charset[i++] = c++;
198
c = 0x9bc; // from BENGALI SIGN NUKTA
199
while (c <= 0x9c4) // ..to BENGALI VOWEL SIGN VOCALIC RR
200
charset[i++] = c++;
201
charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E
202
charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI
203
c = 0x9cb; // from BENGALI VOWEL SIGN O
204
while (c <= 0x9ce) // ..to BENGALI LETTER KHANDA TA
205
charset[i++] = c++;
206
charset[i++] = 0x9dc; // BENGALI LETTER RRA
207
charset[i++] = 0x9dd; // BENGALI LETTER RHA
208
c = 0x9df; // from BENGALI LETTER YYA
209
while (c <= 0x9e3) // ..to BENGALI VOWEL SIGN VOCALIC LL
210
charset[i++] = c++;
211
c = 0x9e6; // from BENGALI DIGIT ZERO
212
while (c <= 0x9fe) // ..to BENGALI SANDHI MARK
213
charset[i++] = c++;
214
// 0A00..0A7F; Gurmukhi
215
charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI
216
charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA
217
c = 0xa05; // from GURMUKHI LETTER A
218
while (c <= 0xa0a) // ..to GURMUKHI LETTER UU
219
charset[i++] = c++;
220
charset[i++] = 0xa0f; // GURMUKHI LETTER EE
221
charset[i++] = 0xa10; // GURMUKHI LETTER AI
222
c = 0xa13; // from GURMUKHI LETTER OO
223
while (c <= 0xa28) // ..to GURMUKHI LETTER NA
224
charset[i++] = c++;
225
c = 0xa2a; // from GURMUKHI LETTER PA
226
while (c <= 0xa30) // ..to GURMUKHI LETTER RA
227
charset[i++] = c++;
228
charset[i++] = 0xa32; // GURMUKHI LETTER LA
229
charset[i++] = 0xa33; // GURMUKHI LETTER LLA
230
charset[i++] = 0xa35; // GURMUKHI LETTER VA
231
charset[i++] = 0xa36; // GURMUKHI LETTER SHA
232
charset[i++] = 0xa38; // GURMUKHI LETTER SA
233
charset[i++] = 0xa39; // GURMUKHI LETTER HA
234
c = 0xa3e; // from GURMUKHI VOWEL SIGN AA
235
while (c <= 0xa42) // ..to GURMUKHI VOWEL SIGN UU
236
charset[i++] = c++;
237
charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE
238
charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI
239
charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO
240
charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA
241
c = 0xa59; // from GURMUKHI LETTER KHHA
242
while (c <= 0xa5c) // ..to GURMUKHI LETTER RRA
243
charset[i++] = c++;
244
c = 0xa66; // from GURMUKHI DIGIT ZERO
245
while (c <= 0xa76) // ..to GURMUKHI ABBREVIATION SIGN
246
charset[i++] = c++;
247
// 0A80..0AFF; Gujarati
248
charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU
249
charset[i++] = 0xa83; // GUJARATI SIGN VISARGA
250
c = 0xa85; // from GUJARATI LETTER A
251
while (c <= 0xa8d) // ..to GUJARATI VOWEL CANDRA E
252
charset[i++] = c++;
253
charset[i++] = 0xa8f; // GUJARATI LETTER E
254
charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O
255
c = 0xa93; // from GUJARATI LETTER O
256
while (c <= 0xaa8) // ..to GUJARATI LETTER NA
257
charset[i++] = c++;
258
c = 0xaaa; // from GUJARATI LETTER PA
259
while (c <= 0xab0) // ..to GUJARATI LETTER RA
260
charset[i++] = c++;
261
charset[i++] = 0xab2; // GUJARATI LETTER LA
262
charset[i++] = 0xab3; // GUJARATI LETTER LLA
263
c = 0xab5; // from GUJARATI LETTER VA
264
while (c <= 0xab9) // ..to GUJARATI LETTER HA
265
charset[i++] = c++;
266
c = 0xabc; // from GUJARATI SIGN NUKTA
267
while (c <= 0xac5) // ..to GUJARATI VOWEL SIGN CANDRA E
268
charset[i++] = c++;
269
charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E
270
charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O
271
charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O
272
charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA
273
c = 0xae0; // from GUJARATI LETTER VOCALIC RR
274
while (c <= 0xae3) // ..to GUJARATI VOWEL SIGN VOCALIC LL
275
charset[i++] = c++;
276
c = 0xae6; // from GUJARATI DIGIT ZERO
277
while (c <= 0xaf1) // ..to GUJARATI RUPEE SIGN
278
charset[i++] = c++;
279
c = 0xaf9; // from GUJARATI LETTER ZHA
280
while (c <= 0xaff) // ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE
281
charset[i++] = c++;
282
// 0B00..0B7F; Oriya
283
charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU
284
charset[i++] = 0xb03; // ORIYA SIGN VISARGA
285
c = 0xb05; // from ORIYA LETTER A
286
while (c <= 0xb0c) // ..to ORIYA LETTER VOCALIC L
287
charset[i++] = c++;
288
charset[i++] = 0xb0f; // ORIYA LETTER E
289
charset[i++] = 0xb10; // ORIYA LETTER AI
290
c = 0xb13; // from ORIYA LETTER O
291
while (c <= 0xb28) // ..to ORIYA LETTER NA
292
charset[i++] = c++;
293
c = 0xb2a; // from ORIYA LETTER PA
294
while (c <= 0xb30) // ..to ORIYA LETTER RA
295
charset[i++] = c++;
296
charset[i++] = 0xb32; // ORIYA LETTER LA
297
charset[i++] = 0xb33; // ORIYA LETTER LLA
298
c = 0xb35; // from ORIYA LETTER VA
299
while (c <= 0xb39) // ..to ORIYA LETTER HA
300
charset[i++] = c++;
301
c = 0xb3c; // from ORIYA SIGN NUKTA
302
while (c <= 0xb44) // ..to ORIYA VOWEL SIGN VOCALIC RR
303
charset[i++] = c++;
304
charset[i++] = 0xb47; // ORIYA VOWEL SIGN E
305
charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI
306
charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O
307
charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA
308
charset[i++] = 0xb55; // ORIYA SIGN OVERLINE
309
charset[i++] = 0xb57; // ORIYA AU LENGTH MARK
310
charset[i++] = 0xb5c; // ORIYA LETTER RRA
311
charset[i++] = 0xb5d; // ORIYA LETTER RHA
312
c = 0xb5f; // from ORIYA LETTER YYA
313
while (c <= 0xb63) // ..to ORIYA VOWEL SIGN VOCALIC LL
314
charset[i++] = c++;
315
c = 0xb66; // from ORIYA DIGIT ZERO
316
while (c <= 0xb77) // ..to ORIYA FRACTION THREE SIXTEENTHS
317
charset[i++] = c++;
318
// 0B80..0BFF; Tamil
319
charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA
320
charset[i++] = 0xb83; // TAMIL SIGN VISARGA
321
c = 0xb85; // from TAMIL LETTER A
322
while (c <= 0xb8a) // ..to TAMIL LETTER UU
323
charset[i++] = c++;
324
charset[i++] = 0xb8e; // TAMIL LETTER E
325
charset[i++] = 0xb90; // TAMIL LETTER AI
326
c = 0xb92; // from TAMIL LETTER O
327
while (c <= 0xb95) // ..to TAMIL LETTER KA
328
charset[i++] = c++;
329
charset[i++] = 0xb99; // TAMIL LETTER NGA
330
charset[i++] = 0xb9a; // TAMIL LETTER CA
331
charset[i++] = 0xb9e; // TAMIL LETTER NYA
332
charset[i++] = 0xb9f; // TAMIL LETTER TTA
333
charset[i++] = 0xba3; // TAMIL LETTER NNA
334
charset[i++] = 0xba4; // TAMIL LETTER TA
335
charset[i++] = 0xba8; // TAMIL LETTER NA
336
charset[i++] = 0xbaa; // TAMIL LETTER PA
337
c = 0xbae; // from TAMIL LETTER MA
338
while (c <= 0xbb9) // ..to TAMIL LETTER HA
339
charset[i++] = c++;
340
c = 0xbbe; // from TAMIL VOWEL SIGN AA
341
while (c <= 0xbc2) // ..to TAMIL VOWEL SIGN UU
342
charset[i++] = c++;
343
charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E
344
charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI
345
c = 0xbca; // from TAMIL VOWEL SIGN O
346
while (c <= 0xbcd) // ..to TAMIL SIGN VIRAMA
347
charset[i++] = c++;
348
c = 0xbe6; // from TAMIL DIGIT ZERO
349
while (c <= 0xbfa) // ..to TAMIL NUMBER SIGN
350
charset[i++] = c++;
351
// 0C00..0C7F; Telugu
352
c = 0xc00; // from TELUGU SIGN COMBINING CANDRABINDU ABOVE
353
while (c <= 0xc0c) // ..to TELUGU LETTER VOCALIC L
354
charset[i++] = c++;
355
charset[i++] = 0xc0e; // TELUGU LETTER E
356
charset[i++] = 0xc10; // TELUGU LETTER AI
357
c = 0xc12; // from TELUGU LETTER O
358
while (c <= 0xc28) // ..to TELUGU LETTER NA
359
charset[i++] = c++;
360
c = 0xc2a; // from TELUGU LETTER PA
361
while (c <= 0xc39) // ..to TELUGU LETTER HA
362
charset[i++] = c++;
363
c = 0xc3d; // from TELUGU SIGN AVAGRAHA
364
while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR
365
charset[i++] = c++;
366
charset[i++] = 0xc46; // TELUGU VOWEL SIGN E
367
charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI
368
c = 0xc4a; // from TELUGU VOWEL SIGN O
369
while (c <= 0xc4d) // ..to TELUGU SIGN VIRAMA
370
charset[i++] = c++;
371
charset[i++] = 0xc55; // TELUGU LENGTH MARK
372
charset[i++] = 0xc56; // TELUGU AI LENGTH MARK
373
charset[i++] = 0xc58; // TELUGU LETTER TSA
374
charset[i++] = 0xc5a; // TELUGU LETTER RRRA
375
c = 0xc60; // from TELUGU LETTER VOCALIC RR
376
while (c <= 0xc63) // ..to TELUGU VOWEL SIGN VOCALIC LL
377
charset[i++] = c++;
378
c = 0xc66; // from TELUGU DIGIT ZERO
379
while (c <= 0xc6f) // ..to TELUGU DIGIT NINE
380
charset[i++] = c++;
381
c = 0xc77; // from TELUGU SIGN SIDDHAM
382
while (c <= 0xc7f) // ..to TELUGU SIGN TUUMU
383
charset[i++] = c++;
384
// 0C80..0CFF; Kannada
385
c = 0xc80; // from KANNADA SIGN SPACING CANDRABINDU
386
while (c <= 0xc8c) // ..to KANNADA LETTER VOCALIC L
387
charset[i++] = c++;
388
charset[i++] = 0xc8e; // KANNADA LETTER E
389
charset[i++] = 0xc90; // KANNADA LETTER AI
390
c = 0xc92; // from KANNADA LETTER O
391
while (c <= 0xca8) // ..to KANNADA LETTER NA
392
charset[i++] = c++;
393
c = 0xcaa; // from KANNADA LETTER PA
394
while (c <= 0xcb3) // ..to KANNADA LETTER LLA
395
charset[i++] = c++;
396
c = 0xcb5; // from KANNADA LETTER VA
397
while (c <= 0xcb9) // ..to KANNADA LETTER HA
398
charset[i++] = c++;
399
c = 0xcbc; // from KANNADA SIGN NUKTA
400
while (c <= 0xcc4) // ..to KANNADA VOWEL SIGN VOCALIC RR
401
charset[i++] = c++;
402
charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E
403
charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI
404
c = 0xcca; // from KANNADA VOWEL SIGN O
405
while (c <= 0xccd) // ..to KANNADA SIGN VIRAMA
406
charset[i++] = c++;
407
charset[i++] = 0xcd5; // KANNADA LENGTH MARK
408
charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK
409
c = 0xce0; // from KANNADA LETTER VOCALIC RR
410
while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL
411
charset[i++] = c++;
412
c = 0xce6; // from KANNADA DIGIT ZERO
413
while (c <= 0xcef) // ..to KANNADA DIGIT NINE
414
charset[i++] = c++;
415
charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA
416
charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA
417
// 0D00..0D7F; Malayalam
418
c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE
419
while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L
420
charset[i++] = c++;
421
charset[i++] = 0xd0e; // MALAYALAM LETTER E
422
charset[i++] = 0xd10; // MALAYALAM LETTER AI
423
c = 0xd12; // from MALAYALAM LETTER O
424
while (c <= 0xd44) // ..to MALAYALAM VOWEL SIGN VOCALIC RR
425
charset[i++] = c++;
426
charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E
427
charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI
428
c = 0xd4a; // from MALAYALAM VOWEL SIGN O
429
while (c <= 0xd4f) // ..to MALAYALAM SIGN PARA
430
charset[i++] = c++;
431
c = 0xd54; // from MALAYALAM LETTER CHILLU M
432
while (c <= 0xd63) // ..to MALAYALAM VOWEL SIGN VOCALIC LL
433
charset[i++] = c++;
434
c = 0xd66; // from MALAYALAM DIGIT ZERO
435
while (c <= 0xd7f) // ..to MALAYALAM LETTER CHILLU K
436
charset[i++] = c++;
437
// 0D80..0DFF; Sinhala
438
charset[i++] = 0xd81; // SINHALA SIGN CANDRABINDU
439
charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA
440
c = 0xd85; // from SINHALA LETTER AYANNA
441
while (c <= 0xd96) // ..to SINHALA LETTER AUYANNA
442
charset[i++] = c++;
443
c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA
444
while (c <= 0xdb1) // ..to SINHALA LETTER DANTAJA NAYANNA
445
charset[i++] = c++;
446
c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA
447
while (c <= 0xdbb) // ..to SINHALA LETTER RAYANNA
448
charset[i++] = c++;
449
c = 0xdc0; // from SINHALA LETTER VAYANNA
450
while (c <= 0xdc6) // ..to SINHALA LETTER FAYANNA
451
charset[i++] = c++;
452
c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA
453
while (c <= 0xdd4) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA
454
charset[i++] = c++;
455
c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA
456
while (c <= 0xddf) // ..to SINHALA VOWEL SIGN GAYANUKITTA
457
charset[i++] = c++;
458
c = 0xde6; // from SINHALA LITH DIGIT ZERO
459
while (c <= 0xdef) // ..to SINHALA LITH DIGIT NINE
460
charset[i++] = c++;
461
charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA
462
charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA
463
// 0E00..0E7F; Thai
464
c = 0xe01; // from THAI CHARACTER KO KAI
465
while (c <= 0xe3a) // ..to THAI CHARACTER PHINTHU
466
charset[i++] = c++;
467
c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT
468
while (c <= 0xe5b) // ..to THAI CHARACTER KHOMUT
469
charset[i++] = c++;
470
// 0E80..0EFF; Lao
471
charset[i++] = 0xe81; // LAO LETTER KO
472
charset[i++] = 0xe82; // LAO LETTER KHO SUNG
473
c = 0xe86; // from LAO LETTER PALI GHA
474
while (c <= 0xe8a) // ..to LAO LETTER SO TAM
475
charset[i++] = c++;
476
c = 0xe8c; // from LAO LETTER PALI JHA
477
while (c <= 0xea3) // ..to LAO LETTER LO LING
478
charset[i++] = c++;
479
c = 0xea7; // from LAO LETTER WO
480
while (c <= 0xebd) // ..to LAO SEMIVOWEL SIGN NYO
481
charset[i++] = c++;
482
c = 0xec0; // from LAO VOWEL SIGN E
483
while (c <= 0xec4) // ..to LAO VOWEL SIGN AI
484
charset[i++] = c++;
485
c = 0xec8; // from LAO TONE MAI EK
486
while (c <= 0xecd) // ..to LAO NIGGAHITA
487
charset[i++] = c++;
488
c = 0xed0; // from LAO DIGIT ZERO
489
while (c <= 0xed9) // ..to LAO DIGIT NINE
490
charset[i++] = c++;
491
c = 0xedc; // from LAO HO NO
492
while (c <= 0xedf) // ..to LAO LETTER KHMU NYO
493
charset[i++] = c++;
494
// 0F00..0FFF; Tibetan
495
c = 0xf00; // from TIBETAN SYLLABLE OM
496
while (c <= 0xf47) // ..to TIBETAN LETTER JA
497
charset[i++] = c++;
498
c = 0xf49; // from TIBETAN LETTER NYA
499
while (c <= 0xf6c) // ..to TIBETAN LETTER RRA
500
charset[i++] = c++;
501
c = 0xf71; // from TIBETAN VOWEL SIGN AA
502
while (c <= 0xf97) // ..to TIBETAN SUBJOINED LETTER JA
503
charset[i++] = c++;
504
c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA
505
while (c <= 0xfbc) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA
506
charset[i++] = c++;
507
c = 0xfbe; // from TIBETAN KU RU KHA
508
while (c <= 0xfcc) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL
509
charset[i++] = c++;
510
c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR
511
while (c <= 0xfda) // ..to TIBETAN MARK TRAILING MCHAN RTAGS
512
charset[i++] = c++;
513
// 1000..109F; Myanmar
514
c = 0x1000; // from MYANMAR LETTER KA
515
while (c <= 0x109f) // ..to MYANMAR SYMBOL SHAN EXCLAMATION
516
charset[i++] = c++;
517
// 10A0..10FF; Georgian
518
c = 0x10a0; // from GEORGIAN CAPITAL LETTER AN
519
while (c <= 0x10c5) // ..to GEORGIAN CAPITAL LETTER HOE
520
charset[i++] = c++;
521
c = 0x10d0; // from GEORGIAN LETTER AN
522
while (c <= 0x10ff) // ..to GEORGIAN LETTER LABIAL SIGN
523
charset[i++] = c++;
524
// 1100..11FF; Hangul Jamo
525
c = 0x1100; // from HANGUL CHOSEONG KIYEOK
526
while (c <= 0x11ff) // ..to HANGUL JONGSEONG SSANGNIEUN
527
charset[i++] = c++;
528
// 1200..137F; Ethiopic
529
c = 0x1200; // from ETHIOPIC SYLLABLE HA
530
while (c <= 0x1248) // ..to ETHIOPIC SYLLABLE QWA
531
charset[i++] = c++;
532
c = 0x124a; // from ETHIOPIC SYLLABLE QWI
533
while (c <= 0x124d) // ..to ETHIOPIC SYLLABLE QWE
534
charset[i++] = c++;
535
c = 0x1250; // from ETHIOPIC SYLLABLE QHA
536
while (c <= 0x1256) // ..to ETHIOPIC SYLLABLE QHO
537
charset[i++] = c++;
538
c = 0x125a; // from ETHIOPIC SYLLABLE QHWI
539
while (c <= 0x125d) // ..to ETHIOPIC SYLLABLE QHWE
540
charset[i++] = c++;
541
c = 0x1260; // from ETHIOPIC SYLLABLE BA
542
while (c <= 0x1288) // ..to ETHIOPIC SYLLABLE XWA
543
charset[i++] = c++;
544
c = 0x128a; // from ETHIOPIC SYLLABLE XWI
545
while (c <= 0x128d) // ..to ETHIOPIC SYLLABLE XWE
546
charset[i++] = c++;
547
c = 0x1290; // from ETHIOPIC SYLLABLE NA
548
while (c <= 0x12b0) // ..to ETHIOPIC SYLLABLE KWA
549
charset[i++] = c++;
550
c = 0x12b2; // from ETHIOPIC SYLLABLE KWI
551
while (c <= 0x12b5) // ..to ETHIOPIC SYLLABLE KWE
552
charset[i++] = c++;
553
c = 0x12b8; // from ETHIOPIC SYLLABLE KXA
554
while (c <= 0x12be) // ..to ETHIOPIC SYLLABLE KXO
555
charset[i++] = c++;
556
c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI
557
while (c <= 0x12c5) // ..to ETHIOPIC SYLLABLE KXWE
558
charset[i++] = c++;
559
c = 0x12c8; // from ETHIOPIC SYLLABLE WA
560
while (c <= 0x12d6) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O
561
charset[i++] = c++;
562
c = 0x12d8; // from ETHIOPIC SYLLABLE ZA
563
while (c <= 0x1310) // ..to ETHIOPIC SYLLABLE GWA
564
charset[i++] = c++;
565
c = 0x1312; // from ETHIOPIC SYLLABLE GWI
566
while (c <= 0x1315) // ..to ETHIOPIC SYLLABLE GWE
567
charset[i++] = c++;
568
c = 0x1318; // from ETHIOPIC SYLLABLE GGA
569
while (c <= 0x135a) // ..to ETHIOPIC SYLLABLE FYA
570
charset[i++] = c++;
571
c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK
572
while (c <= 0x137c) // ..to ETHIOPIC NUMBER TEN THOUSAND
573
charset[i++] = c++;
574
// 1380..139F; Ethiopic Supplement
575
c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA
576
while (c <= 0x1399) // ..to ETHIOPIC TONAL MARK KURT
577
charset[i++] = c++;
578
// 13A0..13FF; Cherokee
579
c = 0x13a0; // from CHEROKEE LETTER A
580
while (c <= 0x13f5) // ..to CHEROKEE LETTER MV
581
charset[i++] = c++;
582
c = 0x13f8; // from CHEROKEE SMALL LETTER YE
583
while (c <= 0x13fd) // ..to CHEROKEE SMALL LETTER MV
584
charset[i++] = c++;
585
// 1400..167F; Unified Canadian Aboriginal Syllabics
586
c = 0x1400; // from CANADIAN SYLLABICS HYPHEN
587
while (c <= 0x167f) // ..to CANADIAN SYLLABICS BLACKFOOT W
588
charset[i++] = c++;
589
// 1680..169F; Ogham
590
c = 0x1680; // from OGHAM SPACE MARK
591
while (c <= 0x169c) // ..to OGHAM REVERSED FEATHER MARK
592
charset[i++] = c++;
593
// 16A0..16FF; Runic
594
c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F
595
while (c <= 0x16f8) // ..to RUNIC LETTER FRANKS CASKET AESC
596
charset[i++] = c++;
597
// 1700..171F; Tagalog
598
c = 0x1700; // from TAGALOG LETTER A
599
while (c <= 0x170c) // ..to TAGALOG LETTER YA
600
charset[i++] = c++;
601
c = 0x170e; // from TAGALOG LETTER LA
602
while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA
603
charset[i++] = c++;
604
// 1720..173F; Hanunoo
605
c = 0x1720; // from HANUNOO LETTER A
606
while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION
607
charset[i++] = c++;
608
// 1740..175F; Buhid
609
c = 0x1740; // from BUHID LETTER A
610
while (c <= 0x1753) // ..to BUHID VOWEL SIGN U
611
charset[i++] = c++;
612
// 1760..177F; Tagbanwa
613
c = 0x1760; // from TAGBANWA LETTER A
614
while (c <= 0x176c) // ..to TAGBANWA LETTER YA
615
charset[i++] = c++;
616
charset[i++] = 0x176e; // TAGBANWA LETTER LA
617
charset[i++] = 0x1770; // TAGBANWA LETTER SA
618
charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I
619
charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U
620
// 1780..17FF; Khmer
621
c = 0x1780; // from KHMER LETTER KA
622
while (c <= 0x17dd) // ..to KHMER SIGN ATTHACAN
623
charset[i++] = c++;
624
c = 0x17e0; // from KHMER DIGIT ZERO
625
while (c <= 0x17e9) // ..to KHMER DIGIT NINE
626
charset[i++] = c++;
627
c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON
628
while (c <= 0x17f9) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON
629
charset[i++] = c++;
630
// 1800..18AF; Mongolian
631
c = 0x1800; // from MONGOLIAN BIRGA
632
while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR
633
charset[i++] = c++;
634
c = 0x1810; // from MONGOLIAN DIGIT ZERO
635
while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE
636
charset[i++] = c++;
637
c = 0x1820; // from MONGOLIAN LETTER A
638
while (c <= 0x1878) // ..to MONGOLIAN LETTER CHA WITH TWO DOTS
639
charset[i++] = c++;
640
c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE
641
while (c <= 0x18aa) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA
642
charset[i++] = c++;
643
// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
644
c = 0x18b0; // from CANADIAN SYLLABICS OY
645
while (c <= 0x18f5) // ..to CANADIAN SYLLABICS CARRIER DENTAL S
646
charset[i++] = c++;
647
// 1900..194F; Limbu
648
c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER
649
while (c <= 0x191e) // ..to LIMBU LETTER TRA
650
charset[i++] = c++;
651
c = 0x1920; // from LIMBU VOWEL SIGN A
652
while (c <= 0x192b) // ..to LIMBU SUBJOINED LETTER WA
653
charset[i++] = c++;
654
c = 0x1930; // from LIMBU SMALL LETTER KA
655
while (c <= 0x193b) // ..to LIMBU SIGN SA-I
656
charset[i++] = c++;
657
c = 0x1944; // from LIMBU EXCLAMATION MARK
658
while (c <= 0x194f) // ..to LIMBU DIGIT NINE
659
charset[i++] = c++;
660
// 1950..197F; Tai Le
661
c = 0x1950; // from TAI LE LETTER KA
662
while (c <= 0x196d) // ..to TAI LE LETTER AI
663
charset[i++] = c++;
664
c = 0x1970; // from TAI LE LETTER TONE-2
665
while (c <= 0x1974) // ..to TAI LE LETTER TONE-6
666
charset[i++] = c++;
667
// 1980..19DF; New Tai Lue
668
c = 0x1980; // from NEW TAI LUE LETTER HIGH QA
669
while (c <= 0x19ab) // ..to NEW TAI LUE LETTER LOW SUA
670
charset[i++] = c++;
671
c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER
672
while (c <= 0x19c9) // ..to NEW TAI LUE TONE MARK-2
673
charset[i++] = c++;
674
c = 0x19d0; // from NEW TAI LUE DIGIT ZERO
675
while (c <= 0x19da) // ..to NEW TAI LUE THAM DIGIT ONE
676
charset[i++] = c++;
677
charset[i++] = 0x19de; // NEW TAI LUE SIGN LAE
678
charset[i++] = 0x19df; // NEW TAI LUE SIGN LAEV
679
// 19E0..19FF; Khmer Symbols
680
c = 0x19e0; // from KHMER SYMBOL PATHAMASAT
681
while (c <= 0x19ff) // ..to KHMER SYMBOL DAP-PRAM ROC
682
charset[i++] = c++;
683
// 1A00..1A1F; Buginese
684
c = 0x1a00; // from BUGINESE LETTER KA
685
while (c <= 0x1a1b) // ..to BUGINESE VOWEL SIGN AE
686
charset[i++] = c++;
687
charset[i++] = 0x1a1e; // BUGINESE PALLAWA
688
charset[i++] = 0x1a1f; // BUGINESE END OF SECTION
689
// 1A20..1AAF; Tai Tham
690
c = 0x1a20; // from TAI THAM LETTER HIGH KA
691
while (c <= 0x1a5e) // ..to TAI THAM CONSONANT SIGN SA
692
charset[i++] = c++;
693
c = 0x1a60; // from TAI THAM SIGN SAKOT
694
while (c <= 0x1a7c) // ..to TAI THAM SIGN KHUEN-LUE KARAN
695
charset[i++] = c++;
696
c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT
697
while (c <= 0x1a89) // ..to TAI THAM HORA DIGIT NINE
698
charset[i++] = c++;
699
c = 0x1a90; // from TAI THAM THAM DIGIT ZERO
700
while (c <= 0x1a99) // ..to TAI THAM THAM DIGIT NINE
701
charset[i++] = c++;
702
c = 0x1aa0; // from TAI THAM SIGN WIANG
703
while (c <= 0x1aad) // ..to TAI THAM SIGN CAANG
704
charset[i++] = c++;
705
// 1AB0..1AFF; Combining Diacritical Marks Extended
706
c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT
707
while (c <= 0x1ac0) // ..to COMBINING LATIN SMALL LETTER TURNED W BELOW
708
charset[i++] = c++;
709
// 1B00..1B7F; Balinese
710
c = 0x1b00; // from BALINESE SIGN ULU RICEM
711
while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK
712
charset[i++] = c++;
713
c = 0x1b50; // from BALINESE DIGIT ZERO
714
while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
715
charset[i++] = c++;
716
// 1B80..1BBF; Sundanese
717
c = 0x1b80; // from SUNDANESE SIGN PANYECEK
718
while (c <= 0x1bbf) // ..to SUNDANESE LETTER FINAL M
719
charset[i++] = c++;
720
// 1BC0..1BFF; Batak
721
c = 0x1bc0; // from BATAK LETTER A
722
while (c <= 0x1bf3) // ..to BATAK PANONGONAN
723
charset[i++] = c++;
724
c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK
725
while (c <= 0x1bff) // ..to BATAK SYMBOL BINDU PANGOLAT
726
charset[i++] = c++;
727
// 1C00..1C4F; Lepcha
728
c = 0x1c00; // from LEPCHA LETTER KA
729
while (c <= 0x1c37) // ..to LEPCHA SIGN NUKTA
730
charset[i++] = c++;
731
c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL
732
while (c <= 0x1c49) // ..to LEPCHA DIGIT NINE
733
charset[i++] = c++;
734
charset[i++] = 0x1c4d; // LEPCHA LETTER TTA
735
charset[i++] = 0x1c4f; // LEPCHA LETTER DDA
736
// 1C50..1C7F; Ol Chiki
737
c = 0x1c50; // from OL CHIKI DIGIT ZERO
738
while (c <= 0x1c7f) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD
739
charset[i++] = c++;
740
// 1C80..1C8F; Cyrillic Extended-C
741
c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE
742
while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK
743
charset[i++] = c++;
744
// 1C90..1CBF; Georgian Extended
745
c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN
746
while (c <= 0x1cba) // ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN
747
charset[i++] = c++;
748
charset[i++] = 0x1cbd; // GEORGIAN MTAVRULI CAPITAL LETTER AEN
749
charset[i++] = 0x1cbf; // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
750
// 1CC0..1CCF; Sundanese Supplement
751
c = 0x1cc0; // from SUNDANESE PUNCTUATION BINDU SURYA
752
while (c <= 0x1cc7) // ..to SUNDANESE PUNCTUATION BINDU BA SATANGA
753
charset[i++] = c++;
754
// 1CD0..1CFF; Vedic Extensions
755
c = 0x1cd0; // from VEDIC TONE KARSHANA
756
while (c <= 0x1cfa) // ..to VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
757
charset[i++] = c++;
758
// 1D00..1D7F; Phonetic Extensions
759
c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A
760
while (c <= 0x1d7f) // ..to LATIN SMALL LETTER UPSILON WITH STROKE
761
charset[i++] = c++;
762
// 1D80..1DBF; Phonetic Extensions Supplement
763
c = 0x1d80; // from LATIN SMALL LETTER B WITH PALATAL HOOK
764
while (c <= 0x1dbf) // ..to MODIFIER LETTER SMALL THETA
765
charset[i++] = c++;
766
// 1DC0..1DFF; Combining Diacritical Marks Supplement
767
c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT
768
while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW
769
charset[i++] = c++;
770
c = 0x1dfb; // from COMBINING DELETION MARK
771
while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
772
charset[i++] = c++;
773
// 1E00..1EFF; Latin Extended Additional
774
c = 0x1e00; // from LATIN CAPITAL LETTER A WITH RING BELOW
775
while (c <= 0x1eff) // ..to LATIN SMALL LETTER Y WITH LOOP
776
charset[i++] = c++;
777
// 1F00..1FFF; Greek Extended
778
c = 0x1f00; // from GREEK SMALL LETTER ALPHA WITH PSILI
779
while (c <= 0x1f15) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
780
charset[i++] = c++;
781
c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI
782
while (c <= 0x1f1d) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
783
charset[i++] = c++;
784
c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI
785
while (c <= 0x1f45) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
786
charset[i++] = c++;
787
c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI
788
while (c <= 0x1f4d) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
789
charset[i++] = c++;
790
c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI
791
while (c <= 0x1f57) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
792
charset[i++] = c++;
793
c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
794
while (c <= 0x1f7d) // ..to GREEK SMALL LETTER OMEGA WITH OXIA
795
charset[i++] = c++;
796
c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
797
while (c <= 0x1fb4) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
798
charset[i++] = c++;
799
c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI
800
while (c <= 0x1fc4) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
801
charset[i++] = c++;
802
c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI
803
while (c <= 0x1fd3) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
804
charset[i++] = c++;
805
c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI
806
while (c <= 0x1fdb) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA
807
charset[i++] = c++;
808
c = 0x1fdd; // from GREEK DASIA AND VARIA
809
while (c <= 0x1fef) // ..to GREEK VARIA
810
charset[i++] = c++;
811
charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
812
charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
813
c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI
814
while (c <= 0x1ffe) // ..to GREEK DASIA
815
charset[i++] = c++;
816
// 2000..206F; General Punctuation
817
c = 0x2000; // from EN QUAD
818
while (c <= 0x2064) // ..to INVISIBLE PLUS
819
charset[i++] = c++;
820
c = 0x2066; // from LEFT-TO-RIGHT ISOLATE
821
while (c <= 0x206f) // ..to NOMINAL DIGIT SHAPES
822
charset[i++] = c++;
823
// 2070..209F; Superscripts and Subscripts
824
charset[i++] = 0x2070; // SUPERSCRIPT ZERO
825
charset[i++] = 0x2071; // SUPERSCRIPT LATIN SMALL LETTER I
826
c = 0x2074; // from SUPERSCRIPT FOUR
827
while (c <= 0x208e) // ..to SUBSCRIPT RIGHT PARENTHESIS
828
charset[i++] = c++;
829
c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A
830
while (c <= 0x209c) // ..to LATIN SUBSCRIPT SMALL LETTER T
831
charset[i++] = c++;
832
// 20A0..20CF; Currency Symbols
833
c = 0x20a0; // from EURO-CURRENCY SIGN
834
while (c <= 0x20bf) // ..to BITCOIN SIGN
835
charset[i++] = c++;
836
// 20D0..20FF; Combining Diacritical Marks for Symbols
837
c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE
838
while (c <= 0x20f0) // ..to COMBINING ASTERISK ABOVE
839
charset[i++] = c++;
840
// 2100..214F; Letterlike Symbols
841
c = 0x2100; // from ACCOUNT OF
842
while (c <= 0x214f) // ..to SYMBOL FOR SAMARITAN SOURCE
843
charset[i++] = c++;
844
// 2150..218F; Number Forms
845
c = 0x2150; // from VULGAR FRACTION ONE SEVENTH
846
while (c <= 0x218b) // ..to TURNED DIGIT THREE
847
charset[i++] = c++;
848
// 2190..21FF; Arrows
849
c = 0x2190; // from LEFTWARDS ARROW
850
while (c <= 0x21ff) // ..to LEFT RIGHT OPEN-HEADED ARROW
851
charset[i++] = c++;
852
// 2200..22FF; Mathematical Operators
853
c = 0x2200; // from FOR ALL
854
while (c <= 0x22ff) // ..to Z NOTATION BAG MEMBERSHIP
855
charset[i++] = c++;
856
// 2300..23FF; Miscellaneous Technical
857
c = 0x2300; // from DIAMETER SIGN
858
while (c <= 0x23ff) // ..to OBSERVER EYE SYMBOL
859
charset[i++] = c++;
860
// 2400..243F; Control Pictures
861
c = 0x2400; // from SYMBOL FOR NULL
862
while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO
863
charset[i++] = c++;
864
// 2440..245F; Optical Character Recognition
865
c = 0x2440; // from OCR HOOK
866
while (c <= 0x244a) // ..to OCR DOUBLE BACKSLASH
867
charset[i++] = c++;
868
// 2460..24FF; Enclosed Alphanumerics
869
c = 0x2460; // from CIRCLED DIGIT ONE
870
while (c <= 0x24ff) // ..to NEGATIVE CIRCLED DIGIT ZERO
871
charset[i++] = c++;
872
// 2500..257F; Box Drawing
873
c = 0x2500; // from BOX DRAWINGS LIGHT HORIZONTAL
874
while (c <= 0x257f) // ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN
875
charset[i++] = c++;
876
// 2580..259F; Block Elements
877
c = 0x2580; // from UPPER HALF BLOCK
878
while (c <= 0x259f) // ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT
879
charset[i++] = c++;
880
// 25A0..25FF; Geometric Shapes
881
c = 0x25a0; // from BLACK SQUARE
882
while (c <= 0x25ff) // ..to LOWER RIGHT TRIANGLE
883
charset[i++] = c++;
884
// 2600..26FF; Miscellaneous Symbols
885
c = 0x2600; // from BLACK SUN WITH RAYS
886
while (c <= 0x26ff) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
887
charset[i++] = c++;
888
// 2700..27BF; Dingbats
889
c = 0x2700; // from BLACK SAFETY SCISSORS
890
while (c <= 0x27bf) // ..to DOUBLE CURLY LOOP
891
charset[i++] = c++;
892
// 27C0..27EF; Miscellaneous Mathematical Symbols-A
893
c = 0x27c0; // from THREE DIMENSIONAL ANGLE
894
while (c <= 0x27ef) // ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS
895
charset[i++] = c++;
896
// 27F0..27FF; Supplemental Arrows-A
897
c = 0x27f0; // from UPWARDS QUADRUPLE ARROW
898
while (c <= 0x27ff) // ..to LONG RIGHTWARDS SQUIGGLE ARROW
899
charset[i++] = c++;
900
// 2800..28FF; Braille Patterns
901
c = 0x2800; // from BRAILLE PATTERN BLANK
902
while (c <= 0x28ff) // ..to BRAILLE PATTERN DOTS-12345678
903
charset[i++] = c++;
904
// 2900..297F; Supplemental Arrows-B
905
c = 0x2900; // from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE
906
while (c <= 0x297f) // ..to DOWN FISH TAIL
907
charset[i++] = c++;
908
// 2980..29FF; Miscellaneous Mathematical Symbols-B
909
c = 0x2980; // from TRIPLE VERTICAL BAR DELIMITER
910
while (c <= 0x29ff) // ..to MINY
911
charset[i++] = c++;
912
// 2A00..2AFF; Supplemental Mathematical Operators
913
c = 0x2a00; // from N-ARY CIRCLED DOT OPERATOR
914
while (c <= 0x2aff) // ..to N-ARY WHITE VERTICAL BAR
915
charset[i++] = c++;
916
// 2B00..2BFF; Miscellaneous Symbols and Arrows
917
c = 0x2b00; // from NORTH EAST WHITE ARROW
918
while (c <= 0x2b73) // ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
919
charset[i++] = c++;
920
c = 0x2b76; // from NORTH WEST TRIANGLE-HEADED ARROW TO BAR
921
while (c <= 0x2b95) // ..to RIGHTWARDS BLACK ARROW
922
charset[i++] = c++;
923
c = 0x2b97; // from SYMBOL FOR TYPE A ELECTRONICS
924
while (c <= 0x2bff) // ..to HELLSCHREIBER PAUSE SYMBOL
925
charset[i++] = c++;
926
// 2C00..2C5F; Glagolitic
927
c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU
928
while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
929
charset[i++] = c++;
930
c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU
931
while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE
932
charset[i++] = c++;
933
// 2C60..2C7F; Latin Extended-C
934
c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR
935
while (c <= 0x2c7f) // ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL
936
charset[i++] = c++;
937
// 2C80..2CFF; Coptic
938
c = 0x2c80; // from COPTIC CAPITAL LETTER ALFA
939
while (c <= 0x2cf3) // ..to COPTIC SMALL LETTER BOHAIRIC KHEI
940
charset[i++] = c++;
941
c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP
942
while (c <= 0x2cff) // ..to COPTIC MORPHOLOGICAL DIVIDER
943
charset[i++] = c++;
944
// 2D00..2D2F; Georgian Supplement
945
c = 0x2d00; // from GEORGIAN SMALL LETTER AN
946
while (c <= 0x2d25) // ..to GEORGIAN SMALL LETTER HOE
947
charset[i++] = c++;
948
c = 0x2d27; // from GEORGIAN SMALL LETTER YN
949
while (c <= 0x2d2d) // ..to GEORGIAN SMALL LETTER AEN
950
charset[i++] = c++;
951
// 2D30..2D7F; Tifinagh
952
c = 0x2d30; // from TIFINAGH LETTER YA
953
while (c <= 0x2d67) // ..to TIFINAGH LETTER YO
954
charset[i++] = c++;
955
charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK
956
charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK
957
charset[i++] = 0x2d7f; // TIFINAGH CONSONANT JOINER
958
// 2D80..2DDF; Ethiopic Extended
959
c = 0x2d80; // from ETHIOPIC SYLLABLE LOA
960
while (c <= 0x2d96) // ..to ETHIOPIC SYLLABLE GGWE
961
charset[i++] = c++;
962
c = 0x2da0; // from ETHIOPIC SYLLABLE SSA
963
while (c <= 0x2da6) // ..to ETHIOPIC SYLLABLE SSO
964
charset[i++] = c++;
965
c = 0x2da8; // from ETHIOPIC SYLLABLE CCA
966
while (c <= 0x2dae) // ..to ETHIOPIC SYLLABLE CCO
967
charset[i++] = c++;
968
c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA
969
while (c <= 0x2db6) // ..to ETHIOPIC SYLLABLE ZZO
970
charset[i++] = c++;
971
c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA
972
while (c <= 0x2dbe) // ..to ETHIOPIC SYLLABLE CCHO
973
charset[i++] = c++;
974
c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA
975
while (c <= 0x2dc6) // ..to ETHIOPIC SYLLABLE QYO
976
charset[i++] = c++;
977
c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA
978
while (c <= 0x2dce) // ..to ETHIOPIC SYLLABLE KYO
979
charset[i++] = c++;
980
c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA
981
while (c <= 0x2dd6) // ..to ETHIOPIC SYLLABLE XYO
982
charset[i++] = c++;
983
c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA
984
while (c <= 0x2dde) // ..to ETHIOPIC SYLLABLE GYO
985
charset[i++] = c++;
986
// 2DE0..2DFF; Cyrillic Extended-A
987
c = 0x2de0; // from COMBINING CYRILLIC LETTER BE
988
while (c <= 0x2dff) // ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
989
charset[i++] = c++;
990
// 2E00..2E7F; Supplemental Punctuation
991
c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER
992
while (c <= 0x2e52) // ..to TIRONIAN SIGN CAPITAL ET
993
charset[i++] = c++;
994
// 2E80..2EFF; CJK Radicals Supplement
995
c = 0x2e80; // from CJK RADICAL REPEAT
996
while (c <= 0x2e99) // ..to CJK RADICAL RAP
997
charset[i++] = c++;
998
c = 0x2e9b; // from CJK RADICAL CHOKE
999
while (c <= 0x2ef3) // ..to CJK RADICAL C-SIMPLIFIED TURTLE
1000
charset[i++] = c++;
1001
// 2F00..2FDF; Kangxi Radicals
1002
c = 0x2f00; // from KANGXI RADICAL ONE
1003
while (c <= 0x2fd5) // ..to KANGXI RADICAL FLUTE
1004
charset[i++] = c++;
1005
// 2FF0..2FFF; Ideographic Description Characters
1006
c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
1007
while (c <= 0x2ffb) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
1008
charset[i++] = c++;
1009
// 3000..303F; CJK Symbols and Punctuation
1010
c = 0x3000; // from IDEOGRAPHIC SPACE
1011
while (c <= 0x303f) // ..to IDEOGRAPHIC HALF FILL SPACE
1012
charset[i++] = c++;
1013
// 3040..309F; Hiragana
1014
c = 0x3041; // from HIRAGANA LETTER SMALL A
1015
while (c <= 0x3096) // ..to HIRAGANA LETTER SMALL KE
1016
charset[i++] = c++;
1017
c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
1018
while (c <= 0x309f) // ..to HIRAGANA DIGRAPH YORI
1019
charset[i++] = c++;
1020
// 30A0..30FF; Katakana
1021
c = 0x30a0; // from KATAKANA-HIRAGANA DOUBLE HYPHEN
1022
while (c <= 0x30ff) // ..to KATAKANA DIGRAPH KOTO
1023
charset[i++] = c++;
1024
// 3100..312F; Bopomofo
1025
c = 0x3105; // from BOPOMOFO LETTER B
1026
while (c <= 0x312f) // ..to BOPOMOFO LETTER NN
1027
charset[i++] = c++;
1028
// 3130..318F; Hangul Compatibility Jamo
1029
c = 0x3131; // from HANGUL LETTER KIYEOK
1030
while (c <= 0x318e) // ..to HANGUL LETTER ARAEAE
1031
charset[i++] = c++;
1032
// 3190..319F; Kanbun
1033
c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK
1034
while (c <= 0x319f) // ..to IDEOGRAPHIC ANNOTATION MAN MARK
1035
charset[i++] = c++;
1036
// 31A0..31BF; Bopomofo Extended
1037
c = 0x31a0; // from BOPOMOFO LETTER BU
1038
while (c <= 0x31bf) // ..to BOPOMOFO LETTER AH
1039
charset[i++] = c++;
1040
// 31C0..31EF; CJK Strokes
1041
c = 0x31c0; // from CJK STROKE T
1042
while (c <= 0x31e3) // ..to CJK STROKE Q
1043
charset[i++] = c++;
1044
// 31F0..31FF; Katakana Phonetic Extensions
1045
c = 0x31f0; // from KATAKANA LETTER SMALL KU
1046
while (c <= 0x31ff) // ..to KATAKANA LETTER SMALL RO
1047
charset[i++] = c++;
1048
// 3200..32FF; Enclosed CJK Letters and Months
1049
c = 0x3200; // from PARENTHESIZED HANGUL KIYEOK
1050
while (c <= 0x321e) // ..to PARENTHESIZED KOREAN CHARACTER O HU
1051
charset[i++] = c++;
1052
c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE
1053
while (c <= 0x32ff) // ..to SQUARE ERA NAME REIWA
1054
charset[i++] = c++;
1055
// 3300..33FF; CJK Compatibility
1056
c = 0x3300; // from SQUARE APAATO
1057
while (c <= 0x33ff) // ..to SQUARE GAL
1058
charset[i++] = c++;
1059
// 3400..4DBF; CJK Unified Ideographs Extension A
1060
c = 0x3400; // from <CJK Ideograph Extension A, First>
1061
while (c <= 0x4dbf) // ..to <CJK Ideograph Extension A, Last>
1062
charset[i++] = c++;
1063
// 4DC0..4DFF; Yijing Hexagram Symbols
1064
c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN
1065
while (c <= 0x4dff) // ..to HEXAGRAM FOR BEFORE COMPLETION
1066
charset[i++] = c++;
1067
// 4E00..9FFF; CJK Unified Ideographs
1068
c = 0x4e00; // from <CJK Ideograph, First>
1069
while (c <= 0x9ffc) // ..to <CJK Ideograph, Last>
1070
charset[i++] = c++;
1071
// A000..A48F; Yi Syllables
1072
c = 0xa000; // from YI SYLLABLE IT
1073
while (c <= 0xa48c) // ..to YI SYLLABLE YYR
1074
charset[i++] = c++;
1075
// A490..A4CF; Yi Radicals
1076
c = 0xa490; // from YI RADICAL QOT
1077
while (c <= 0xa4c6) // ..to YI RADICAL KE
1078
charset[i++] = c++;
1079
// A4D0..A4FF; Lisu
1080
c = 0xa4d0; // from LISU LETTER BA
1081
while (c <= 0xa4ff) // ..to LISU PUNCTUATION FULL STOP
1082
charset[i++] = c++;
1083
// A500..A63F; Vai
1084
c = 0xa500; // from VAI SYLLABLE EE
1085
while (c <= 0xa62b) // ..to VAI SYLLABLE NDOLE DO
1086
charset[i++] = c++;
1087
// A640..A69F; Cyrillic Extended-B
1088
c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA
1089
while (c <= 0xa69f) // ..to COMBINING CYRILLIC LETTER IOTIFIED E
1090
charset[i++] = c++;
1091
// A6A0..A6FF; Bamum
1092
c = 0xa6a0; // from BAMUM LETTER A
1093
while (c <= 0xa6f7) // ..to BAMUM QUESTION MARK
1094
charset[i++] = c++;
1095
// A700..A71F; Modifier Tone Letters
1096
c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING
1097
while (c <= 0xa71f) // ..to MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
1098
charset[i++] = c++;
1099
// A720..A7FF; Latin Extended-D
1100
c = 0xa720; // from MODIFIER LETTER STRESS AND HIGH TONE
1101
while (c <= 0xa7bf) // ..to LATIN SMALL LETTER GLOTTAL U
1102
charset[i++] = c++;
1103
c = 0xa7c2; // from LATIN CAPITAL LETTER ANGLICANA W
1104
while (c <= 0xa7ca) // ..to LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
1105
charset[i++] = c++;
1106
c = 0xa7f5; // from LATIN CAPITAL LETTER REVERSED HALF H
1107
while (c <= 0xa7ff) // ..to LATIN EPIGRAPHIC LETTER ARCHAIC M
1108
charset[i++] = c++;
1109
// A800..A82F; Syloti Nagri
1110
c = 0xa800; // from SYLOTI NAGRI LETTER A
1111
while (c <= 0xa82c) // ..to SYLOTI NAGRI SIGN ALTERNATE HASANTA
1112
charset[i++] = c++;
1113
// A830..A83F; Common Indic Number Forms
1114
c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER
1115
while (c <= 0xa839) // ..to NORTH INDIC QUANTITY MARK
1116
charset[i++] = c++;
1117
// A840..A87F; Phags-pa
1118
c = 0xa840; // from PHAGS-PA LETTER KA
1119
while (c <= 0xa877) // ..to PHAGS-PA MARK DOUBLE SHAD
1120
charset[i++] = c++;
1121
// A880..A8DF; Saurashtra
1122
c = 0xa880; // from SAURASHTRA SIGN ANUSVARA
1123
while (c <= 0xa8c5) // ..to SAURASHTRA SIGN CANDRABINDU
1124
charset[i++] = c++;
1125
c = 0xa8ce; // from SAURASHTRA DANDA
1126
while (c <= 0xa8d9) // ..to SAURASHTRA DIGIT NINE
1127
charset[i++] = c++;
1128
// A8E0..A8FF; Devanagari Extended
1129
c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO
1130
while (c <= 0xa8ff) // ..to DEVANAGARI VOWEL SIGN AY
1131
charset[i++] = c++;
1132
// A900..A92F; Kayah Li
1133
c = 0xa900; // from KAYAH LI DIGIT ZERO
1134
while (c <= 0xa92f) // ..to KAYAH LI SIGN SHYA
1135
charset[i++] = c++;
1136
// A930..A95F; Rejang
1137
c = 0xa930; // from REJANG LETTER KA
1138
while (c <= 0xa953) // ..to REJANG VIRAMA
1139
charset[i++] = c++;
1140
charset[i++] = 0xa95f; // REJANG SECTION MARK
1141
// A960..A97F; Hangul Jamo Extended-A
1142
c = 0xa960; // from HANGUL CHOSEONG TIKEUT-MIEUM
1143
while (c <= 0xa97c) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH
1144
charset[i++] = c++;
1145
// A980..A9DF; Javanese
1146
c = 0xa980; // from JAVANESE SIGN PANYANGGA
1147
while (c <= 0xa9cd) // ..to JAVANESE TURNED PADA PISELEH
1148
charset[i++] = c++;
1149
c = 0xa9cf; // from JAVANESE PANGRANGKEP
1150
while (c <= 0xa9d9) // ..to JAVANESE DIGIT NINE
1151
charset[i++] = c++;
1152
charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES
1153
charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN
1154
// A9E0..A9FF; Myanmar Extended-B
1155
c = 0xa9e0; // from MYANMAR LETTER SHAN GHA
1156
while (c <= 0xa9fe) // ..to MYANMAR LETTER TAI LAING BHA
1157
charset[i++] = c++;
1158
// AA00..AA5F; Cham
1159
c = 0xaa00; // from CHAM LETTER A
1160
while (c <= 0xaa36) // ..to CHAM CONSONANT SIGN WA
1161
charset[i++] = c++;
1162
c = 0xaa40; // from CHAM LETTER FINAL K
1163
while (c <= 0xaa4d) // ..to CHAM CONSONANT SIGN FINAL H
1164
charset[i++] = c++;
1165
c = 0xaa50; // from CHAM DIGIT ZERO
1166
while (c <= 0xaa59) // ..to CHAM DIGIT NINE
1167
charset[i++] = c++;
1168
c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL
1169
while (c <= 0xaa5f) // ..to CHAM PUNCTUATION TRIPLE DANDA
1170
charset[i++] = c++;
1171
// AA60..AA7F; Myanmar Extended-A
1172
c = 0xaa60; // from MYANMAR LETTER KHAMTI GA
1173
while (c <= 0xaa7f) // ..to MYANMAR LETTER SHWE PALAUNG SHA
1174
charset[i++] = c++;
1175
// AA80..AADF; Tai Viet
1176
c = 0xaa80; // from TAI VIET LETTER LOW KO
1177
while (c <= 0xaac2) // ..to TAI VIET TONE MAI SONG
1178
charset[i++] = c++;
1179
c = 0xaadb; // from TAI VIET SYMBOL KON
1180
while (c <= 0xaadf) // ..to TAI VIET SYMBOL KOI KOI
1181
charset[i++] = c++;
1182
// AAE0..AAFF; Meetei Mayek Extensions
1183
c = 0xaae0; // from MEETEI MAYEK LETTER E
1184
while (c <= 0xaaf6) // ..to MEETEI MAYEK VIRAMA
1185
charset[i++] = c++;
1186
// AB00..AB2F; Ethiopic Extended-A
1187
c = 0xab01; // from ETHIOPIC SYLLABLE TTHU
1188
while (c <= 0xab06) // ..to ETHIOPIC SYLLABLE TTHO
1189
charset[i++] = c++;
1190
c = 0xab09; // from ETHIOPIC SYLLABLE DDHU
1191
while (c <= 0xab0e) // ..to ETHIOPIC SYLLABLE DDHO
1192
charset[i++] = c++;
1193
c = 0xab11; // from ETHIOPIC SYLLABLE DZU
1194
while (c <= 0xab16) // ..to ETHIOPIC SYLLABLE DZO
1195
charset[i++] = c++;
1196
c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA
1197
while (c <= 0xab26) // ..to ETHIOPIC SYLLABLE CCHHO
1198
charset[i++] = c++;
1199
c = 0xab28; // from ETHIOPIC SYLLABLE BBA
1200
while (c <= 0xab2e) // ..to ETHIOPIC SYLLABLE BBO
1201
charset[i++] = c++;
1202
// AB30..AB6F; Latin Extended-E
1203
c = 0xab30; // from LATIN SMALL LETTER BARRED ALPHA
1204
while (c <= 0xab6b) // ..to MODIFIER LETTER RIGHT TACK
1205
charset[i++] = c++;
1206
// AB70..ABBF; Cherokee Supplement
1207
c = 0xab70; // from CHEROKEE SMALL LETTER A
1208
while (c <= 0xabbf) // ..to CHEROKEE SMALL LETTER YA
1209
charset[i++] = c++;
1210
// ABC0..ABFF; Meetei Mayek
1211
c = 0xabc0; // from MEETEI MAYEK LETTER KOK
1212
while (c <= 0xabed) // ..to MEETEI MAYEK APUN IYEK
1213
charset[i++] = c++;
1214
c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO
1215
while (c <= 0xabf9) // ..to MEETEI MAYEK DIGIT NINE
1216
charset[i++] = c++;
1217
// AC00..D7AF; Hangul Syllables
1218
c = 0xac00; // from <Hangul Syllable, First>
1219
while (c <= 0xd7a3) // ..to <Hangul Syllable, Last>
1220
charset[i++] = c++;
1221
// D7B0..D7FF; Hangul Jamo Extended-B
1222
c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO
1223
while (c <= 0xd7c6) // ..to HANGUL JUNGSEONG ARAEA-E
1224
charset[i++] = c++;
1225
c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL
1226
while (c <= 0xd7fb) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH
1227
charset[i++] = c++;
1228
// D800..DB7F; High Surrogates
1229
// DB80..DBFF; High Private Use Surrogates
1230
// DC00..DFFF; Low Surrogates
1231
// E000..F8FF; Private Use Area
1232
// F900..FAFF; CJK Compatibility Ideographs
1233
c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900
1234
while (c <= 0xfa6d) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D
1235
charset[i++] = c++;
1236
c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70
1237
while (c <= 0xfad9) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9
1238
charset[i++] = c++;
1239
// FB00..FB4F; Alphabetic Presentation Forms
1240
c = 0xfb00; // from LATIN SMALL LIGATURE FF
1241
while (c <= 0xfb06) // ..to LATIN SMALL LIGATURE ST
1242
charset[i++] = c++;
1243
c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW
1244
while (c <= 0xfb17) // ..to ARMENIAN SMALL LIGATURE MEN XEH
1245
charset[i++] = c++;
1246
c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ
1247
while (c <= 0xfb36) // ..to HEBREW LETTER ZAYIN WITH DAGESH
1248
charset[i++] = c++;
1249
c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH
1250
while (c <= 0xfb3c) // ..to HEBREW LETTER LAMED WITH DAGESH
1251
charset[i++] = c++;
1252
charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH
1253
charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH
1254
charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH
1255
charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH
1256
c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH
1257
while (c <= 0xfb4f) // ..to HEBREW LIGATURE ALEF LAMED
1258
charset[i++] = c++;
1259
// FB50..FDFF; Arabic Presentation Forms-A
1260
c = 0xfb50; // from ARABIC LETTER ALEF WASLA ISOLATED FORM
1261
while (c <= 0xfbc1) // ..to ARABIC SYMBOL SMALL TAH BELOW
1262
charset[i++] = c++;
1263
c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM
1264
while (c <= 0xfd3f) // ..to ORNATE RIGHT PARENTHESIS
1265
charset[i++] = c++;
1266
c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM
1267
while (c <= 0xfd8f) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
1268
charset[i++] = c++;
1269
c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM
1270
while (c <= 0xfdc7) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
1271
charset[i++] = c++;
1272
c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM
1273
while (c <= 0xfdfd) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
1274
charset[i++] = c++;
1275
// FE00..FE0F; Variation Selectors
1276
c = 0xfe00; // from VARIATION SELECTOR-1
1277
while (c <= 0xfe0f) // ..to VARIATION SELECTOR-16
1278
charset[i++] = c++;
1279
// FE10..FE1F; Vertical Forms
1280
c = 0xfe10; // from PRESENTATION FORM FOR VERTICAL COMMA
1281
while (c <= 0xfe19) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
1282
charset[i++] = c++;
1283
// FE20..FE2F; Combining Half Marks
1284
c = 0xfe20; // from COMBINING LIGATURE LEFT HALF
1285
while (c <= 0xfe2f) // ..to COMBINING CYRILLIC TITLO RIGHT HALF
1286
charset[i++] = c++;
1287
// FE30..FE4F; CJK Compatibility Forms
1288
c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
1289
while (c <= 0xfe4f) // ..to WAVY LOW LINE
1290
charset[i++] = c++;
1291
// FE50..FE6F; Small Form Variants
1292
charset[i++] = 0xfe50; // SMALL COMMA
1293
charset[i++] = 0xfe52; // SMALL FULL STOP
1294
c = 0xfe54; // from SMALL SEMICOLON
1295
while (c <= 0xfe66) // ..to SMALL EQUALS SIGN
1296
charset[i++] = c++;
1297
c = 0xfe68; // from SMALL REVERSE SOLIDUS
1298
while (c <= 0xfe6b) // ..to SMALL COMMERCIAL AT
1299
charset[i++] = c++;
1300
// FE70..FEFF; Arabic Presentation Forms-B
1301
c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM
1302
while (c <= 0xfe74) // ..to ARABIC KASRATAN ISOLATED FORM
1303
charset[i++] = c++;
1304
c = 0xfe76; // from ARABIC FATHA ISOLATED FORM
1305
while (c <= 0xfefc) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM
1306
charset[i++] = c++;
1307
charset[i++] = 0xfeff; // ZERO WIDTH NO-BREAK SPACE
1308
// FF00..FFEF; Halfwidth and Fullwidth Forms
1309
c = 0xff01; // from FULLWIDTH EXCLAMATION MARK
1310
while (c <= 0xffbe) // ..to HALFWIDTH HANGUL LETTER HIEUH
1311
charset[i++] = c++;
1312
c = 0xffc2; // from HALFWIDTH HANGUL LETTER A
1313
while (c <= 0xffc7) // ..to HALFWIDTH HANGUL LETTER E
1314
charset[i++] = c++;
1315
c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO
1316
while (c <= 0xffcf) // ..to HALFWIDTH HANGUL LETTER OE
1317
charset[i++] = c++;
1318
c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO
1319
while (c <= 0xffd7) // ..to HALFWIDTH HANGUL LETTER YU
1320
charset[i++] = c++;
1321
charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU
1322
charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I
1323
c = 0xffe0; // from FULLWIDTH CENT SIGN
1324
while (c <= 0xffe6) // ..to FULLWIDTH WON SIGN
1325
charset[i++] = c++;
1326
c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL
1327
while (c <= 0xffee) // ..to HALFWIDTH WHITE CIRCLE
1328
charset[i++] = c++;
1329
// FFF0..FFFF; Specials
1330
c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR
1331
while (c <= 0xfffd) // ..to REPLACEMENT CHARACTER
1332
charset[i++] = c++;
1333
1334
/* Zero-terminate it, and cache the first character */
1335
charset[i] = 0;
1336
c0 = charset[0];
1337
1338
last = minlength - 1;
1339
i = 0;
1340
while (i <= last) {
1341
id[i] = 0;
1342
word[i++] = c0;
1343
}
1344
lastid = -1;
1345
word[i] = 0;
1346
1347
/* We must init word with dummy data, it doesn't get set until filter() */
1348
word = 1;
1349
}
1350
1351
void generate()
1352
{
1353
int i;
1354
1355
/* Handle the typical case specially */
1356
if (word[last] = charset[++lastid]) return;
1357
1358
lastid = 0;
1359
word[i = last] = c0;
1360
while (i--) { // Have a preceding position?
1361
if (word[i] = charset[++id[i]]) return;
1362
id[i] = 0;
1363
word[i] = c0;
1364
}
1365
1366
if (++last < maxlength) { // Next length?
1367
id[last] = lastid = 0;
1368
word[last] = c0;
1369
word[last + 1] = 0;
1370
} else // We're done
1371
word = 0;
1372
}
1373
1374
void restore()
1375
{
1376
int i, c;
1377
1378
/* Calculate the current length and infer the character indices */
1379
last = 0;
1380
while (c = word[last]) {
1381
i = 0; while (charset[i] != c && charset[i]) i++;
1382
if (!charset[i]) i = 0; // Not found
1383
id[last++] = i;
1384
}
1385
lastid = id[--last];
1386
}
1387
1388