CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
rapid7

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place.

GitHub Repository: rapid7/metasploit-framework
Path: blob/master/lib/rbmysql/charset.rb
Views: 11765
1
# coding: ascii-8bit
2
# Copyright (C) 2008-2012 TOMITA Masahiro
3
# mailto:[email protected]
4
5
#
6
class RbMysql
7
# @!attribute [r] number
8
# @private
9
# @!attribute [r] name
10
# @return [String] charset name
11
# @!attribute [r] csname
12
# @return [String] collation name
13
class Charset
14
# @private
15
# @param [Integer] number
16
# @param [String] name
17
# @param [String] csname
18
def initialize(number, name, csname)
19
@number, @name, @csname = number, name, csname
20
@unsafe = false
21
end
22
23
attr_reader :number, :name, :csname
24
25
# @private
26
attr_accessor :unsafe
27
28
# [[charset_number, charset_name, collation_name, default], ...]
29
# @private
30
CHARSETS = [
31
[ 1, "big5", "big5_chinese_ci", true ],
32
[ 2, "latin2", "latin2_czech_cs", false],
33
[ 3, "dec8", "dec8_swedish_ci", true ],
34
[ 4, "cp850", "cp850_general_ci", true ],
35
[ 5, "latin1", "latin1_german1_ci", false],
36
[ 6, "hp8", "hp8_english_ci", true ],
37
[ 7, "koi8r", "koi8r_general_ci", true ],
38
[ 8, "latin1", "latin1_swedish_ci", true ],
39
[ 9, "latin2", "latin2_general_ci", true ],
40
[ 10, "swe7", "swe7_swedish_ci", true ],
41
[ 11, "ascii", "ascii_general_ci", true ],
42
[ 12, "ujis", "ujis_japanese_ci", true ],
43
[ 13, "sjis", "sjis_japanese_ci", true ],
44
[ 14, "cp1251", "cp1251_bulgarian_ci", false],
45
[ 15, "latin1", "latin1_danish_ci", false],
46
[ 16, "hebrew", "hebrew_general_ci", true ],
47
[ 17, "filename", "filename", true ],
48
[ 18, "tis620", "tis620_thai_ci", true ],
49
[ 19, "euckr", "euckr_korean_ci", true ],
50
[ 20, "latin7", "latin7_estonian_cs", false],
51
[ 21, "latin2", "latin2_hungarian_ci", false],
52
[ 22, "koi8u", "koi8u_general_ci", true ],
53
[ 23, "cp1251", "cp1251_ukrainian_ci", false],
54
[ 24, "gb2312", "gb2312_chinese_ci", true ],
55
[ 25, "greek", "greek_general_ci", true ],
56
[ 26, "cp1250", "cp1250_general_ci", true ],
57
[ 27, "latin2", "latin2_croatian_ci", false],
58
[ 28, "gbk", "gbk_chinese_ci", true ],
59
[ 29, "cp1257", "cp1257_lithuanian_ci", false],
60
[ 30, "latin5", "latin5_turkish_ci", true ],
61
[ 31, "latin1", "latin1_german2_ci", false],
62
[ 32, "armscii8", "armscii8_general_ci", true ],
63
[ 33, "utf8", "utf8_general_ci", true ],
64
[ 34, "cp1250", "cp1250_czech_cs", false],
65
[ 35, "ucs2", "ucs2_general_ci", true ],
66
[ 36, "cp866", "cp866_general_ci", true ],
67
[ 37, "keybcs2", "keybcs2_general_ci", true ],
68
[ 38, "macce", "macce_general_ci", true ],
69
[ 39, "macroman", "macroman_general_ci", true ],
70
[ 40, "cp852", "cp852_general_ci", true ],
71
[ 41, "latin7", "latin7_general_ci", true ],
72
[ 42, "latin7", "latin7_general_cs", false],
73
[ 43, "macce", "macce_bin", false],
74
[ 44, "cp1250", "cp1250_croatian_ci", false],
75
[ 45, "utf8mb4", "utf8mb4_general_ci", true ],
76
[ 46, "utf8mb4", "utf8mb4_bin", false],
77
[ 47, "latin1", "latin1_bin", false],
78
[ 48, "latin1", "latin1_general_ci", false],
79
[ 49, "latin1", "latin1_general_cs", false],
80
[ 50, "cp1251", "cp1251_bin", false],
81
[ 51, "cp1251", "cp1251_general_ci", true ],
82
[ 52, "cp1251", "cp1251_general_cs", false],
83
[ 53, "macroman", "macroman_bin", false],
84
[ 54, "utf16", "utf16_general_ci", true ],
85
[ 55, "utf16", "utf16_bin", false],
86
[ 56, "utf16le", "utf16le_general_ci", true ],
87
[ 57, "cp1256", "cp1256_general_ci", true ],
88
[ 58, "cp1257", "cp1257_bin", false],
89
[ 59, "cp1257", "cp1257_general_ci", true ],
90
[ 60, "utf32", "utf32_general_ci", true ],
91
[ 61, "utf32", "utf32_bin", false],
92
[ 62, "utf16le", "utf16le_bin", false],
93
[ 63, "binary", "binary", true ],
94
[ 64, "armscii8", "armscii8_bin", false],
95
[ 65, "ascii", "ascii_bin", false],
96
[ 66, "cp1250", "cp1250_bin", false],
97
[ 67, "cp1256", "cp1256_bin", false],
98
[ 68, "cp866", "cp866_bin", false],
99
[ 69, "dec8", "dec8_bin", false],
100
[ 70, "greek", "greek_bin", false],
101
[ 71, "hebrew", "hebrew_bin", false],
102
[ 72, "hp8", "hp8_bin", false],
103
[ 73, "keybcs2", "keybcs2_bin", false],
104
[ 74, "koi8r", "koi8r_bin", false],
105
[ 75, "koi8u", "koi8u_bin", false],
106
[ 77, "latin2", "latin2_bin", false],
107
[ 78, "latin5", "latin5_bin", false],
108
[ 79, "latin7", "latin7_bin", false],
109
[ 80, "cp850", "cp850_bin", false],
110
[ 81, "cp852", "cp852_bin", false],
111
[ 82, "swe7", "swe7_bin", false],
112
[ 83, "utf8", "utf8_bin", false],
113
[ 84, "big5", "big5_bin", false],
114
[ 85, "euckr", "euckr_bin", false],
115
[ 86, "gb2312", "gb2312_bin", false],
116
[ 87, "gbk", "gbk_bin", false],
117
[ 88, "sjis", "sjis_bin", false],
118
[ 89, "tis620", "tis620_bin", false],
119
[ 90, "ucs2", "ucs2_bin", false],
120
[ 91, "ujis", "ujis_bin", false],
121
[ 92, "geostd8", "geostd8_general_ci", true ],
122
[ 93, "geostd8", "geostd8_bin", false],
123
[ 94, "latin1", "latin1_spanish_ci", false],
124
[ 95, "cp932", "cp932_japanese_ci", true ],
125
[ 96, "cp932", "cp932_bin", false],
126
[ 97, "eucjpms", "eucjpms_japanese_ci", true ],
127
[ 98, "eucjpms", "eucjpms_bin", false],
128
[ 99, "cp1250", "cp1250_polish_ci", false],
129
[101, "utf16", "utf16_unicode_ci", false],
130
[102, "utf16", "utf16_icelandic_ci", false],
131
[103, "utf16", "utf16_latvian_ci", false],
132
[104, "utf16", "utf16_romanian_ci", false],
133
[105, "utf16", "utf16_slovenian_ci", false],
134
[106, "utf16", "utf16_polish_ci", false],
135
[107, "utf16", "utf16_estonian_ci", false],
136
[108, "utf16", "utf16_spanish_ci", false],
137
[109, "utf16", "utf16_swedish_ci", false],
138
[110, "utf16", "utf16_turkish_ci", false],
139
[111, "utf16", "utf16_czech_ci", false],
140
[112, "utf16", "utf16_danish_ci", false],
141
[113, "utf16", "utf16_lithuanian_ci", false],
142
[114, "utf16", "utf16_slovak_ci", false],
143
[115, "utf16", "utf16_spanish2_ci", false],
144
[116, "utf16", "utf16_roman_ci", false],
145
[117, "utf16", "utf16_persian_ci", false],
146
[118, "utf16", "utf16_esperanto_ci", false],
147
[119, "utf16", "utf16_hungarian_ci", false],
148
[120, "utf16", "utf16_sinhala_ci", false],
149
[121, "utf16", "utf16_german2_ci", false],
150
[122, "utf16", "utf16_croatian_ci", false],
151
[123, "utf16", "utf16_unicode_520_ci", false],
152
[124, "utf16", "utf16_vietnamese_ci", false],
153
[128, "ucs2", "ucs2_unicode_ci", false],
154
[129, "ucs2", "ucs2_icelandic_ci", false],
155
[130, "ucs2", "ucs2_latvian_ci", false],
156
[131, "ucs2", "ucs2_romanian_ci", false],
157
[132, "ucs2", "ucs2_slovenian_ci", false],
158
[133, "ucs2", "ucs2_polish_ci", false],
159
[134, "ucs2", "ucs2_estonian_ci", false],
160
[135, "ucs2", "ucs2_spanish_ci", false],
161
[136, "ucs2", "ucs2_swedish_ci", false],
162
[137, "ucs2", "ucs2_turkish_ci", false],
163
[138, "ucs2", "ucs2_czech_ci", false],
164
[139, "ucs2", "ucs2_danish_ci", false],
165
[140, "ucs2", "ucs2_lithuanian_ci", false],
166
[141, "ucs2", "ucs2_slovak_ci", false],
167
[142, "ucs2", "ucs2_spanish2_ci", false],
168
[143, "ucs2", "ucs2_roman_ci", false],
169
[144, "ucs2", "ucs2_persian_ci", false],
170
[145, "ucs2", "ucs2_esperanto_ci", false],
171
[146, "ucs2", "ucs2_hungarian_ci", false],
172
[147, "ucs2", "ucs2_sinhala_ci", false],
173
[148, "ucs2", "ucs2_german2_ci", false],
174
[149, "ucs2", "ucs2_croatian_ci", false],
175
[150, "ucs2", "ucs2_unicode_520_ci", false],
176
[151, "ucs2", "ucs2_vietnamese_ci", false],
177
[159, "ucs2", "ucs2_general_mysql500_ci", false],
178
[160, "utf32", "utf32_unicode_ci", false],
179
[161, "utf32", "utf32_icelandic_ci", false],
180
[162, "utf32", "utf32_latvian_ci", false],
181
[163, "utf32", "utf32_romanian_ci", false],
182
[164, "utf32", "utf32_slovenian_ci", false],
183
[165, "utf32", "utf32_polish_ci", false],
184
[166, "utf32", "utf32_estonian_ci", false],
185
[167, "utf32", "utf32_spanish_ci", false],
186
[168, "utf32", "utf32_swedish_ci", false],
187
[169, "utf32", "utf32_turkish_ci", false],
188
[170, "utf32", "utf32_czech_ci", false],
189
[171, "utf32", "utf32_danish_ci", false],
190
[172, "utf32", "utf32_lithuanian_ci", false],
191
[173, "utf32", "utf32_slovak_ci", false],
192
[174, "utf32", "utf32_spanish2_ci", false],
193
[175, "utf32", "utf32_roman_ci", false],
194
[176, "utf32", "utf32_persian_ci", false],
195
[177, "utf32", "utf32_esperanto_ci", false],
196
[178, "utf32", "utf32_hungarian_ci", false],
197
[179, "utf32", "utf32_sinhala_ci", false],
198
[180, "utf32", "utf32_german2_ci", false],
199
[181, "utf32", "utf32_croatian_ci", false],
200
[182, "utf32", "utf32_unicode_520_ci", false],
201
[183, "utf32", "utf32_vietnamese_ci", false],
202
[192, "utf8", "utf8_unicode_ci", false],
203
[193, "utf8", "utf8_icelandic_ci", false],
204
[194, "utf8", "utf8_latvian_ci", false],
205
[195, "utf8", "utf8_romanian_ci", false],
206
[196, "utf8", "utf8_slovenian_ci", false],
207
[197, "utf8", "utf8_polish_ci", false],
208
[198, "utf8", "utf8_estonian_ci", false],
209
[199, "utf8", "utf8_spanish_ci", false],
210
[200, "utf8", "utf8_swedish_ci", false],
211
[201, "utf8", "utf8_turkish_ci", false],
212
[202, "utf8", "utf8_czech_ci", false],
213
[203, "utf8", "utf8_danish_ci", false],
214
[204, "utf8", "utf8_lithuanian_ci", false],
215
[205, "utf8", "utf8_slovak_ci", false],
216
[206, "utf8", "utf8_spanish2_ci", false],
217
[207, "utf8", "utf8_roman_ci", false],
218
[208, "utf8", "utf8_persian_ci", false],
219
[209, "utf8", "utf8_esperanto_ci", false],
220
[210, "utf8", "utf8_hungarian_ci", false],
221
[211, "utf8", "utf8_sinhala_ci", false],
222
[212, "utf8", "utf8_german2_ci", false],
223
[213, "utf8", "utf8_croatian_ci", false],
224
[214, "utf8", "utf8_unicode_520_ci", false],
225
[215, "utf8", "utf8_vietnamese_ci", false],
226
[223, "utf8", "utf8_general_mysql500_ci", false],
227
[224, "utf8mb4", "utf8mb4_unicode_ci", false],
228
[225, "utf8mb4", "utf8mb4_icelandic_ci", false],
229
[226, "utf8mb4", "utf8mb4_latvian_ci", false],
230
[227, "utf8mb4", "utf8mb4_romanian_ci", false],
231
[228, "utf8mb4", "utf8mb4_slovenian_ci", false],
232
[229, "utf8mb4", "utf8mb4_polish_ci", false],
233
[230, "utf8mb4", "utf8mb4_estonian_ci", false],
234
[231, "utf8mb4", "utf8mb4_spanish_ci", false],
235
[232, "utf8mb4", "utf8mb4_swedish_ci", false],
236
[233, "utf8mb4", "utf8mb4_turkish_ci", false],
237
[234, "utf8mb4", "utf8mb4_czech_ci", false],
238
[235, "utf8mb4", "utf8mb4_danish_ci", false],
239
[236, "utf8mb4", "utf8mb4_lithuanian_ci", false],
240
[237, "utf8mb4", "utf8mb4_slovak_ci", false],
241
[238, "utf8mb4", "utf8mb4_spanish2_ci", false],
242
[239, "utf8mb4", "utf8mb4_roman_ci", false],
243
[240, "utf8mb4", "utf8mb4_persian_ci", false],
244
[241, "utf8mb4", "utf8mb4_esperanto_ci", false],
245
[242, "utf8mb4", "utf8mb4_hungarian_ci", false],
246
[243, "utf8mb4", "utf8mb4_sinhala_ci", false],
247
[244, "utf8mb4", "utf8mb4_german2_ci", false],
248
[245, "utf8mb4", "utf8mb4_croatian_ci", false],
249
[246, "utf8mb4", "utf8mb4_unicode_520_ci", false],
250
[247, "utf8mb4", "utf8mb4_vietnamese_ci", false],
251
[248, "gb18030", "gb18030_chinese_ci", true ],
252
[249, "gb18030", "gb18030_bin", false],
253
[250, "gb18030", "gb18030_unicode_520_ci", false],
254
[254, "utf8", "utf8_general_cs", false],
255
]
256
257
# @private
258
UNSAFE_CHARSET = [
259
"big5", "sjis", "filename", "gbk", "ucs2", "cp932",
260
]
261
262
# @private
263
NUMBER_TO_CHARSET = {}
264
# @private
265
COLLATION_TO_CHARSET = {}
266
# @private
267
CHARSET_DEFAULT = {}
268
CHARSETS.each do |number, csname, clname, default|
269
cs = Charset.new number, csname, clname
270
cs.unsafe = true if UNSAFE_CHARSET.include? csname
271
NUMBER_TO_CHARSET[number] = cs
272
COLLATION_TO_CHARSET[clname] = cs
273
CHARSET_DEFAULT[csname] = cs if default
274
end
275
276
# @private
277
BINARY_CHARSET_NUMBER = CHARSET_DEFAULT['binary'].number
278
279
# @private
280
# @param [Integer] n
281
# @return [RbMysql::Charset]
282
def self.by_number(n)
283
raise ClientError, "unknown charset number: #{n}" unless NUMBER_TO_CHARSET.key? n
284
NUMBER_TO_CHARSET[n]
285
end
286
287
# @private
288
# @param [String] str
289
# @return [RbMysql::Charset]
290
def self.by_name(str)
291
ret = COLLATION_TO_CHARSET[str] || CHARSET_DEFAULT[str]
292
raise ClientError, "unknown charset: #{str}" unless ret
293
ret
294
end
295
296
if defined? Encoding
297
298
# @private
299
# MySQL Charset -> Ruby's Encoding
300
CHARSET_ENCODING = {
301
"armscii8" => nil,
302
"ascii" => Encoding::US_ASCII,
303
"big5" => Encoding::Big5,
304
"binary" => Encoding::ASCII_8BIT,
305
"cp1250" => Encoding::Windows_1250,
306
"cp1251" => Encoding::Windows_1251,
307
"cp1256" => Encoding::Windows_1256,
308
"cp1257" => Encoding::Windows_1257,
309
"cp850" => Encoding::CP850,
310
"cp852" => Encoding::CP852,
311
"cp866" => Encoding::IBM866,
312
"cp932" => Encoding::Windows_31J,
313
"dec8" => nil,
314
"eucjpms" => Encoding::EucJP_ms,
315
"euckr" => Encoding::EUC_KR,
316
"gb2312" => Encoding::EUC_CN,
317
"gbk" => Encoding::GBK,
318
"geostd8" => nil,
319
"greek" => Encoding::ISO_8859_7,
320
"hebrew" => Encoding::ISO_8859_8,
321
"hp8" => nil,
322
"keybcs2" => nil,
323
"koi8r" => Encoding::KOI8_R,
324
"koi8u" => Encoding::KOI8_U,
325
"latin1" => Encoding::ISO_8859_1,
326
"latin2" => Encoding::ISO_8859_2,
327
"latin5" => Encoding::ISO_8859_9,
328
"latin7" => Encoding::ISO_8859_13,
329
"macce" => Encoding::MacCentEuro,
330
"macroman" => Encoding::MacRoman,
331
"sjis" => Encoding::SHIFT_JIS,
332
"swe7" => nil,
333
"tis620" => Encoding::TIS_620,
334
"ucs2" => Encoding::UTF_16BE,
335
"ujis" => Encoding::EucJP_ms,
336
"utf8" => Encoding::UTF_8,
337
"utf8mb4" => Encoding::UTF_8,
338
}
339
340
# @private
341
# @param [String] value
342
# @return [String]
343
def self.to_binary(value)
344
value.force_encoding Encoding::ASCII_8BIT
345
end
346
347
# @private
348
# convert raw to encoding and convert to Encoding.default_internal
349
# @param [String] raw
350
# @param [Encoding] encoding
351
# @return [String] result
352
def self.convert_encoding(raw, encoding)
353
raw.force_encoding(encoding).encode
354
end
355
356
# @private
357
# return corresponding Ruby encoding
358
# @return [Encoding] encoding
359
def encoding
360
enc = CHARSET_ENCODING[@name.downcase]
361
raise RbMysql::ClientError, "unsupported charset: #{@name}" unless enc
362
enc
363
end
364
365
# @private
366
# convert encoding to corresponding to MySQL charset
367
# @param [String] value
368
# @return [String]
369
def convert(value)
370
if value.is_a? String and value.encoding != Encoding::ASCII_8BIT
371
value = value.encode encoding
372
end
373
value
374
end
375
376
else
377
# for Ruby 1.8
378
379
def self.to_binary(value)
380
value
381
end
382
383
def self.convert_encoding(raw, encoding)
384
raw
385
end
386
387
def encoding
388
nil
389
end
390
391
def convert(value)
392
value
393
end
394
395
end
396
end
397
end
398