Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/i18n/uchar.js
2868 views
1
// Copyright 2009 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
/**
16
* @fileoverview Collection of utility functions for Unicode character.
17
*
18
*/
19
20
goog.provide('goog.i18n.uChar');
21
22
23
// Constants for handling Unicode supplementary characters (surrogate pairs).
24
25
26
/**
27
* The minimum value for Supplementary code points.
28
* @type {number}
29
* @private
30
*/
31
goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ = 0x10000;
32
33
34
/**
35
* The highest Unicode code point value (scalar value) according to the Unicode
36
* Standard.
37
* @type {number}
38
* @private
39
*/
40
goog.i18n.uChar.CODE_POINT_MAX_VALUE_ = 0x10FFFF;
41
42
43
/**
44
* Lead surrogate minimum value.
45
* @type {number}
46
* @private
47
*/
48
goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ = 0xD800;
49
50
51
/**
52
* Lead surrogate maximum value.
53
* @type {number}
54
* @private
55
*/
56
goog.i18n.uChar.LEAD_SURROGATE_MAX_VALUE_ = 0xDBFF;
57
58
59
/**
60
* Trail surrogate minimum value.
61
* @type {number}
62
* @private
63
*/
64
goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;
65
66
67
/**
68
* Trail surrogate maximum value.
69
* @type {number}
70
* @private
71
*/
72
goog.i18n.uChar.TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;
73
74
75
/**
76
* The number of least significant bits of a supplementary code point that in
77
* UTF-16 become the least significant bits of the trail surrogate. The rest of
78
* the in-use bits of the supplementary code point become the least significant
79
* bits of the lead surrogate.
80
* @type {number}
81
* @private
82
*/
83
goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_ = 10;
84
85
86
/**
87
* Gets the U+ notation string of a Unicode character. Ex: 'U+0041' for 'A'.
88
* @param {string} ch The given character.
89
* @return {string} The U+ notation of the given character.
90
*/
91
goog.i18n.uChar.toHexString = function(ch) {
92
var chCode = goog.i18n.uChar.toCharCode(ch);
93
var chCodeStr = 'U+' +
94
goog.i18n.uChar.padString_(chCode.toString(16).toUpperCase(), 4, '0');
95
96
return chCodeStr;
97
};
98
99
100
/**
101
* Gets a string padded with given character to get given size.
102
* @param {string} str The given string to be padded.
103
* @param {number} length The target size of the string.
104
* @param {string} ch The character to be padded with.
105
* @return {string} The padded string.
106
* @private
107
*/
108
goog.i18n.uChar.padString_ = function(str, length, ch) {
109
while (str.length < length) {
110
str = ch + str;
111
}
112
return str;
113
};
114
115
116
/**
117
* Gets Unicode value of the given character.
118
* @param {string} ch The given character, which in the case of a supplementary
119
* character is actually a surrogate pair. The remainder of the string is
120
* ignored.
121
* @return {number} The Unicode value of the character.
122
*/
123
goog.i18n.uChar.toCharCode = function(ch) {
124
return goog.i18n.uChar.getCodePointAround(ch, 0);
125
};
126
127
128
/**
129
* Gets a character from the given Unicode value. If the given code point is not
130
* a valid Unicode code point, null is returned.
131
* @param {number} code The Unicode value of the character.
132
* @return {?string} The character corresponding to the given Unicode value.
133
*/
134
goog.i18n.uChar.fromCharCode = function(code) {
135
if (!goog.isDefAndNotNull(code) ||
136
!(code >= 0 && code <= goog.i18n.uChar.CODE_POINT_MAX_VALUE_)) {
137
return null;
138
}
139
if (goog.i18n.uChar.isSupplementaryCodePoint(code)) {
140
// First, we split the code point into the trail surrogate part (the
141
// TRAIL_SURROGATE_BIT_COUNT_ least significant bits) and the lead surrogate
142
// part (the rest of the bits, shifted down; note that for now this includes
143
// the supplementary offset, also shifted down, to be subtracted off below).
144
var leadBits = code >> goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_;
145
var trailBits = code &
146
// A bit-mask to get the TRAIL_SURROGATE_BIT_COUNT_ (i.e. 10) least
147
// significant bits. 1 << 10 = 0x0400. 0x0400 - 1 = 0x03FF.
148
((1 << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_) - 1);
149
150
// Now we calculate the code point of each surrogate by adding each offset
151
// to the corresponding base code point.
152
var leadCodePoint = leadBits +
153
(goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ -
154
// Subtract off the supplementary offset, which had been shifted down
155
// with the rest of leadBits. We do this here instead of before the
156
// shift in order to save a separate subtraction step.
157
(goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ >>
158
goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_));
159
var trailCodePoint = trailBits + goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_;
160
161
// Convert the code points into a 2-character long string.
162
return String.fromCharCode(leadCodePoint) +
163
String.fromCharCode(trailCodePoint);
164
}
165
return String.fromCharCode(code);
166
};
167
168
169
/**
170
* Returns the Unicode code point at the specified index.
171
*
172
* If the char value specified at the given index is in the leading-surrogate
173
* range, and the following index is less than the length of {@code string}, and
174
* the char value at the following index is in the trailing-surrogate range,
175
* then the supplementary code point corresponding to this surrogate pair is
176
* returned.
177
*
178
* If the char value specified at the given index is in the trailing-surrogate
179
* range, and the preceding index is not before the start of {@code string}, and
180
* the char value at the preceding index is in the leading-surrogate range, then
181
* the negated supplementary code point corresponding to this surrogate pair is
182
* returned.
183
*
184
* The negation allows the caller to differentiate between the case where the
185
* given index is at the leading surrogate and the one where it is at the
186
* trailing surrogate, and thus deduce where the next character starts and
187
* preceding character ends.
188
*
189
* Otherwise, the char value at the given index is returned. Thus, a leading
190
* surrogate is returned when it is not followed by a trailing surrogate, and a
191
* trailing surrogate is returned when it is not preceded by a leading
192
* surrogate.
193
*
194
* @param {string} string The string.
195
* @param {number} index The index from which the code point is to be retrieved.
196
* @return {number} The code point at the given index. If the given index is
197
* that of the start (i.e. lead surrogate) of a surrogate pair, returns the code
198
* point encoded by the pair. If the given index is that of the end (i.e. trail
199
* surrogate) of a surrogate pair, returns the negated code pointed encoded by
200
* the pair.
201
*/
202
goog.i18n.uChar.getCodePointAround = function(string, index) {
203
var charCode = string.charCodeAt(index);
204
if (goog.i18n.uChar.isLeadSurrogateCodePoint(charCode) &&
205
index + 1 < string.length) {
206
var trail = string.charCodeAt(index + 1);
207
if (goog.i18n.uChar.isTrailSurrogateCodePoint(trail)) {
208
// Part of a surrogate pair.
209
return /** @type {number} */ (
210
goog.i18n.uChar.buildSupplementaryCodePoint(charCode, trail));
211
}
212
} else if (goog.i18n.uChar.isTrailSurrogateCodePoint(charCode) && index > 0) {
213
var lead = string.charCodeAt(index - 1);
214
if (goog.i18n.uChar.isLeadSurrogateCodePoint(lead)) {
215
// Part of a surrogate pair.
216
return /** @type {number} */ (
217
-goog.i18n.uChar.buildSupplementaryCodePoint(lead, charCode));
218
}
219
}
220
return charCode;
221
};
222
223
224
/**
225
* Determines the length of the string needed to represent the specified
226
* Unicode code point.
227
* @param {number} codePoint
228
* @return {number} 2 if codePoint is a supplementary character, 1 otherwise.
229
*/
230
goog.i18n.uChar.charCount = function(codePoint) {
231
return goog.i18n.uChar.isSupplementaryCodePoint(codePoint) ? 2 : 1;
232
};
233
234
235
/**
236
* Determines whether the specified Unicode code point is in the supplementary
237
* Unicode characters range.
238
* @param {number} codePoint
239
* @return {boolean} Whether then given code point is a supplementary character.
240
*/
241
goog.i18n.uChar.isSupplementaryCodePoint = function(codePoint) {
242
return codePoint >= goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ &&
243
codePoint <= goog.i18n.uChar.CODE_POINT_MAX_VALUE_;
244
};
245
246
247
/**
248
* Gets whether the given code point is a leading surrogate character.
249
* @param {number} codePoint
250
* @return {boolean} Whether the given code point is a leading surrogate
251
* character.
252
*/
253
goog.i18n.uChar.isLeadSurrogateCodePoint = function(codePoint) {
254
return codePoint >= goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ &&
255
codePoint <= goog.i18n.uChar.LEAD_SURROGATE_MAX_VALUE_;
256
};
257
258
259
/**
260
* Gets whether the given code point is a trailing surrogate character.
261
* @param {number} codePoint
262
* @return {boolean} Whether the given code point is a trailing surrogate
263
* character.
264
*/
265
goog.i18n.uChar.isTrailSurrogateCodePoint = function(codePoint) {
266
return codePoint >= goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ &&
267
codePoint <= goog.i18n.uChar.TRAIL_SURROGATE_MAX_VALUE_;
268
};
269
270
271
/**
272
* Composes a supplementary Unicode code point from the given UTF-16 surrogate
273
* pair. If leadSurrogate isn't a leading surrogate code point or trailSurrogate
274
* isn't a trailing surrogate code point, null is returned.
275
* @param {number} lead The leading surrogate code point.
276
* @param {number} trail The trailing surrogate code point.
277
* @return {?number} The supplementary Unicode code point obtained by decoding
278
* the given UTF-16 surrogate pair.
279
*/
280
goog.i18n.uChar.buildSupplementaryCodePoint = function(lead, trail) {
281
if (goog.i18n.uChar.isLeadSurrogateCodePoint(lead) &&
282
goog.i18n.uChar.isTrailSurrogateCodePoint(trail)) {
283
var shiftedLeadOffset =
284
(lead << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_) -
285
(goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_
286
<< goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_);
287
var trailOffset = trail - goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ +
288
goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_;
289
return shiftedLeadOffset + trailOffset;
290
}
291
return null;
292
};
293
294