Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/i18n/graphemebreak.js
2868 views
1
// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
/**
16
* @fileoverview Detect Grapheme Cluster Break in a pair of codepoints. Follows
17
* Unicode 5.1 UAX#29. Tailoring for Virama × Indic Consonants is used.
18
*
19
*/
20
21
goog.provide('goog.i18n.GraphemeBreak');
22
23
goog.require('goog.structs.InversionMap');
24
25
26
/**
27
* Enum for all Grapheme Cluster Break properties.
28
* These enums directly corresponds to Grapheme_Cluster_Break property values
29
* mentioned in http://unicode.org/reports/tr29 table 2. VIRAMA and
30
* INDIC_CONSONANT are for the Virama × Base tailoring mentioned in the notes.
31
*
32
* CR and LF are moved to the bottom of the list because they occur only once
33
* and so good candidates to take 2 decimal digit values.
34
* @enum {number}
35
* @protected
36
*/
37
goog.i18n.GraphemeBreak.property = {
38
ANY: 0,
39
CONTROL: 1,
40
EXTEND: 2,
41
PREPEND: 3,
42
SPACING_MARK: 4,
43
INDIC_CONSONANT: 5,
44
VIRAMA: 6,
45
L: 7,
46
V: 8,
47
T: 9,
48
LV: 10,
49
LVT: 11,
50
CR: 12,
51
LF: 13,
52
REGIONAL_INDICATOR: 14
53
};
54
55
56
/**
57
* Grapheme Cluster Break property values for all codepoints as inversion map.
58
* Constructed lazily.
59
*
60
* @type {goog.structs.InversionMap}
61
* @private
62
*/
63
goog.i18n.GraphemeBreak.inversions_ = null;
64
65
66
/**
67
* There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method
68
* is to check for legacy rules.
69
*
70
* @param {number} prop_a The property enum value of the first character.
71
* @param {number} prop_b The property enum value of the second character.
72
* @return {boolean} True if a & b do not form a cluster; False otherwise.
73
* @private
74
*/
75
goog.i18n.GraphemeBreak.applyLegacyBreakRules_ = function(prop_a, prop_b) {
76
77
var prop = goog.i18n.GraphemeBreak.property;
78
79
if (prop_a == prop.CR && prop_b == prop.LF) {
80
return false;
81
}
82
if (prop_a == prop.CONTROL || prop_a == prop.CR || prop_a == prop.LF) {
83
return true;
84
}
85
if (prop_b == prop.CONTROL || prop_b == prop.CR || prop_b == prop.LF) {
86
return true;
87
}
88
if ((prop_a == prop.L) && (prop_b == prop.L || prop_b == prop.V ||
89
prop_b == prop.LV || prop_b == prop.LVT)) {
90
return false;
91
}
92
if ((prop_a == prop.LV || prop_a == prop.V) &&
93
(prop_b == prop.V || prop_b == prop.T)) {
94
return false;
95
}
96
if ((prop_a == prop.LVT || prop_a == prop.T) && (prop_b == prop.T)) {
97
return false;
98
}
99
if (prop_b == prop.EXTEND || prop_b == prop.VIRAMA) {
100
return false;
101
}
102
if (prop_a == prop.VIRAMA && prop_b == prop.INDIC_CONSONANT) {
103
return false;
104
}
105
return true;
106
};
107
108
109
/**
110
* Method to return property enum value of the codepoint. If it is Hangul LV or
111
* LVT, then it is computed; for the rest it is picked from the inversion map.
112
* @param {number} acode The code point value of the character.
113
* @return {number} Property enum value of codepoint.
114
* @private
115
*/
116
goog.i18n.GraphemeBreak.getBreakProp_ = function(acode) {
117
if (0xAC00 <= acode && acode <= 0xD7A3) {
118
var prop = goog.i18n.GraphemeBreak.property;
119
if (acode % 0x1C == 0x10) {
120
return prop.LV;
121
}
122
return prop.LVT;
123
} else {
124
if (!goog.i18n.GraphemeBreak.inversions_) {
125
goog.i18n.GraphemeBreak.inversions_ = new goog.structs.InversionMap(
126
[
127
0, 10, 1, 2, 1, 18, 95, 33, 13, 1, 594,
128
112, 275, 7, 263, 45, 1, 1, 1, 2, 1, 2,
129
1, 1, 56, 5, 11, 11, 48, 21, 16, 1, 101,
130
7, 1, 1, 6, 2, 2, 1, 4, 33, 1, 1,
131
1, 30, 27, 91, 11, 58, 9, 34, 4, 1, 9,
132
1, 3, 1, 5, 43, 3, 136, 31, 1, 17, 37,
133
1, 1, 1, 1, 3, 8, 4, 1, 2, 1, 7,
134
8, 2, 2, 21, 8, 1, 2, 17, 39, 1, 1,
135
1, 2, 6, 6, 1, 9, 5, 4, 2, 2, 12,
136
2, 15, 2, 1, 17, 39, 2, 3, 12, 4, 8,
137
6, 17, 2, 3, 14, 1, 17, 39, 1, 1, 3,
138
8, 4, 1, 20, 2, 29, 1, 2, 17, 39, 1,
139
1, 2, 1, 6, 6, 9, 6, 4, 2, 2, 13,
140
1, 16, 1, 18, 41, 1, 1, 1, 12, 1, 9,
141
1, 41, 3, 17, 37, 4, 3, 5, 7, 8, 3,
142
2, 8, 2, 30, 2, 17, 39, 1, 1, 1, 1,
143
2, 1, 3, 1, 5, 1, 8, 9, 1, 3, 2,
144
30, 2, 17, 38, 3, 1, 2, 5, 7, 1, 9,
145
1, 10, 2, 30, 2, 22, 48, 5, 1, 2, 6,
146
7, 19, 2, 13, 46, 2, 1, 1, 1, 6, 1,
147
12, 8, 50, 46, 2, 1, 1, 1, 9, 11, 6,
148
14, 2, 58, 2, 27, 1, 1, 1, 1, 1, 4,
149
2, 49, 14, 1, 4, 1, 1, 2, 5, 48, 9,
150
1, 57, 33, 12, 4, 1, 6, 1, 2, 2, 2,
151
1, 16, 2, 4, 2, 2, 4, 3, 1, 3, 2,
152
7, 3, 4, 13, 1, 1, 1, 2, 6, 1, 1,
153
14, 1, 98, 96, 72, 88, 349, 3, 931, 15, 2,
154
1, 14, 15, 2, 1, 14, 15, 2, 15, 15, 14,
155
35, 17, 2, 1, 7, 8, 1, 2, 9, 1, 1,
156
9, 1, 45, 3, 155, 1, 87, 31, 3, 4, 2,
157
9, 1, 6, 3, 20, 19, 29, 44, 9, 3, 2,
158
1, 69, 23, 2, 3, 4, 45, 6, 2, 1, 1,
159
1, 8, 1, 1, 1, 2, 8, 6, 13, 128, 4,
160
1, 14, 33, 1, 1, 5, 1, 1, 5, 1, 1,
161
1, 7, 31, 9, 12, 2, 1, 7, 23, 1, 4,
162
2, 2, 2, 2, 2, 11, 3, 2, 36, 2, 1,
163
1, 2, 3, 1, 1, 3, 2, 12, 36, 8, 8,
164
2, 2, 21, 3, 128, 3, 1, 13, 1, 7, 4,
165
1, 4, 2, 1, 203, 64, 523, 1, 2, 2, 24,
166
7, 49, 16, 96, 33, 3070, 3, 141, 1, 96, 32,
167
554, 6, 105, 2, 30164, 4, 1, 10, 33, 1, 80,
168
2, 272, 1, 3, 1, 4, 1, 23, 2, 2, 1,
169
24, 30, 4, 4, 3, 8, 1, 1, 13, 2, 16,
170
34, 16, 1, 27, 18, 24, 24, 4, 8, 2, 23,
171
11, 1, 1, 12, 32, 3, 1, 5, 3, 3, 36,
172
1, 2, 4, 2, 1, 3, 1, 69, 35, 6, 2,
173
2, 2, 2, 12, 1, 8, 1, 1, 18, 16, 1,
174
3, 6, 1, 5, 48, 1, 1, 3, 2, 2, 5,
175
2, 1, 1, 32, 9, 1, 2, 2, 5, 1, 1,
176
201, 14, 2, 1, 1, 9, 8, 2, 1, 2, 1,
177
2, 1, 1, 1, 18, 11184, 27, 49, 1028, 1024, 6942,
178
1, 737, 16, 16, 7, 216, 1, 158, 2, 89, 3,
179
513, 1, 2051, 15, 40, 7, 1, 1472, 1, 1, 1,
180
53, 14, 1, 57, 2, 1, 45, 3, 4, 2, 1,
181
1, 2, 1, 66, 3, 36, 5, 1, 6, 2, 75,
182
2, 1, 48, 3, 9, 1, 1, 1258, 1, 1, 1,
183
2, 6, 1, 1, 22681, 62, 4, 25042, 1, 1, 3,
184
3, 1, 5, 8, 8, 2, 7, 30, 4, 148, 3,
185
8097, 26, 790017, 255
186
],
187
[
188
1, 13, 1, 12, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2,
189
0, 2, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 1,
190
0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 4,
191
0, 5, 2, 4, 2, 0, 4, 2, 4, 6, 4, 0, 2, 5, 0, 2, 0, 5, 2, 4, 0,
192
5, 2, 0, 2, 4, 2, 4, 6, 0, 2, 5, 0, 2, 0, 5, 0, 2, 4, 0, 5, 2,
193
4, 2, 6, 2, 5, 0, 2, 0, 2, 4, 0, 5, 2, 0, 4, 2, 4, 6, 0, 2, 0,
194
2, 4, 0, 5, 2, 0, 2, 4, 2, 4, 6, 2, 5, 0, 2, 0, 5, 0, 2, 0, 5,
195
2, 4, 2, 4, 6, 0, 2, 0, 4, 0, 5, 0, 2, 4, 2, 6, 2, 5, 0, 2, 0,
196
4, 0, 5, 2, 0, 4, 2, 4, 2, 4, 2, 4, 2, 6, 2, 5, 0, 2, 0, 4, 0,
197
5, 0, 2, 4, 2, 4, 6, 0, 2, 0, 2, 0, 4, 0, 5, 6, 2, 4, 2, 4, 2,
198
4, 0, 5, 0, 2, 0, 4, 2, 6, 0, 2, 0, 5, 0, 2, 0, 4, 2, 0, 2, 0,
199
5, 0, 2, 0, 2, 0, 2, 0, 2, 0, 4, 5, 2, 4, 2, 6, 0, 2, 0, 2, 0,
200
2, 0, 5, 0, 2, 4, 2, 0, 6, 4, 2, 5, 0, 5, 0, 4, 2, 5, 2, 5, 0,
201
5, 0, 5, 2, 5, 2, 0, 4, 2, 0, 2, 5, 0, 2, 0, 7, 8, 9, 0, 2, 0,
202
5, 2, 6, 0, 5, 2, 6, 0, 5, 2, 0, 5, 2, 5, 0, 2, 4, 2, 4, 2, 4,
203
2, 6, 2, 0, 2, 0, 2, 0, 2, 0, 5, 2, 4, 2, 4, 2, 4, 2, 0, 5, 0,
204
5, 0, 4, 0, 4, 0, 5, 2, 4, 0, 5, 0, 5, 4, 2, 4, 2, 6, 0, 2, 0,
205
2, 4, 2, 0, 2, 4, 0, 5, 2, 4, 2, 4, 2, 4, 2, 4, 6, 5, 0, 2, 0,
206
2, 4, 0, 5, 4, 2, 4, 2, 6, 4, 5, 0, 5, 0, 5, 0, 2, 4, 2, 4, 2,
207
4, 2, 6, 0, 5, 4, 2, 4, 2, 0, 5, 0, 2, 0, 2, 4, 2, 0, 2, 0, 4,
208
2, 0, 2, 0, 1, 2, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 6, 0, 2, 0, 2,
209
0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 6, 5, 2, 5, 4, 2, 4, 0,
210
5, 0, 5, 0, 5, 0, 5, 0, 4, 0, 5, 4, 6, 0, 2, 0, 5, 0, 2, 0, 5,
211
2, 4, 6, 0, 7, 2, 4, 0, 5, 0, 5, 2, 4, 2, 4, 2, 4, 6, 0, 5, 2,
212
4, 2, 4, 2, 0, 2, 0, 2, 4, 0, 5, 0, 5, 0, 5, 0, 5, 2, 0, 2, 0,
213
2, 0, 2, 0, 2, 0, 5, 4, 2, 4, 0, 4, 6, 0, 5, 0, 5, 0, 5, 0, 4,
214
2, 4, 2, 4, 0, 4, 6, 0, 11, 8, 9, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1,
215
0, 2, 0, 1, 0, 2, 0, 2, 0, 2, 6, 0, 4, 2, 4, 0, 2, 6, 0, 2, 4,
216
0, 4, 2, 4, 6, 2, 0, 1, 0, 2, 0, 2, 4, 2, 6, 0, 2, 4, 0, 4, 2,
217
4, 6, 0, 2, 4, 2, 4, 2, 6, 2, 0, 4, 2, 0, 2, 4, 2, 0, 4, 2, 1,
218
2, 0, 2, 0, 2, 0, 2, 0, 14, 0, 1, 2
219
],
220
true);
221
}
222
return /** @type {number} */ (
223
goog.i18n.GraphemeBreak.inversions_.at(acode));
224
}
225
};
226
227
228
/**
229
* There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method
230
* is to check for both using a boolean flag to switch between them.
231
* @param {number} a The code point value of the first character.
232
* @param {number} b The code point value of the second character.
233
* @param {boolean=} opt_extended If true, indicates extended grapheme cluster;
234
* If false, indicates legacy cluster.
235
* @return {boolean} True if a & b do not form a cluster; False otherwise.
236
*/
237
goog.i18n.GraphemeBreak.hasGraphemeBreak = function(a, b, opt_extended) {
238
239
var prop_a = goog.i18n.GraphemeBreak.getBreakProp_(a);
240
var prop_b = goog.i18n.GraphemeBreak.getBreakProp_(b);
241
var prop = goog.i18n.GraphemeBreak.property;
242
243
return goog.i18n.GraphemeBreak.applyLegacyBreakRules_(prop_a, prop_b) &&
244
!(opt_extended &&
245
(prop_a == prop.PREPEND || prop_b == prop.SPACING_MARK));
246
};
247
248