Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
seleniumhq
GitHub Repository: seleniumhq/selenium
Path: blob/trunk/third_party/closure/goog/format/format.js
2868 views
1
// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
/**
16
* @fileoverview Provides utility functions for formatting strings, numbers etc.
17
*
18
*/
19
20
goog.provide('goog.format');
21
22
goog.require('goog.i18n.GraphemeBreak');
23
goog.require('goog.string');
24
goog.require('goog.userAgent');
25
26
27
/**
28
* Formats a number of bytes in human readable form.
29
* 54, 450K, 1.3M, 5G etc.
30
* @param {number} bytes The number of bytes to show.
31
* @param {number=} opt_decimals The number of decimals to use. Defaults to 2.
32
* @return {string} The human readable form of the byte size.
33
*/
34
goog.format.fileSize = function(bytes, opt_decimals) {
35
return goog.format.numBytesToString(bytes, opt_decimals, false);
36
};
37
38
39
/**
40
* Checks whether string value containing scaling units (K, M, G, T, P, m,
41
* u, n) can be converted to a number.
42
*
43
* Where there is a decimal, there must be a digit to the left of the
44
* decimal point.
45
*
46
* Negative numbers are valid.
47
*
48
* Examples:
49
* 0, 1, 1.0, 10.4K, 2.3M, -0.3P, 1.2m
50
*
51
* @param {string} val String value to check.
52
* @return {boolean} True if string could be converted to a numeric value.
53
*/
54
goog.format.isConvertableScaledNumber = function(val) {
55
return goog.format.SCALED_NUMERIC_RE_.test(val);
56
};
57
58
59
/**
60
* Converts a string to numeric value, taking into account the units.
61
* If string ends in 'B', use binary conversion.
62
* @param {string} stringValue String to be converted to numeric value.
63
* @return {number} Numeric value for string.
64
*/
65
goog.format.stringToNumericValue = function(stringValue) {
66
if (goog.string.endsWith(stringValue, 'B')) {
67
return goog.format.stringToNumericValue_(
68
stringValue, goog.format.NUMERIC_SCALES_BINARY_);
69
}
70
return goog.format.stringToNumericValue_(
71
stringValue, goog.format.NUMERIC_SCALES_SI_);
72
};
73
74
75
/**
76
* Converts a string to number of bytes, taking into account the units.
77
* Binary conversion.
78
* @param {string} stringValue String to be converted to numeric value.
79
* @return {number} Numeric value for string.
80
*/
81
goog.format.stringToNumBytes = function(stringValue) {
82
return goog.format.stringToNumericValue_(
83
stringValue, goog.format.NUMERIC_SCALES_BINARY_);
84
};
85
86
87
/**
88
* Converts a numeric value to string representation. SI conversion.
89
* @param {number} val Value to be converted.
90
* @param {number=} opt_decimals The number of decimals to use. Defaults to 2.
91
* @return {string} String representation of number.
92
*/
93
goog.format.numericValueToString = function(val, opt_decimals) {
94
return goog.format.numericValueToString_(
95
val, goog.format.NUMERIC_SCALES_SI_, opt_decimals);
96
};
97
98
99
/**
100
* Converts number of bytes to string representation. Binary conversion.
101
* Default is to return the additional 'B' suffix only for scales greater than
102
* 1K, e.g. '10.5KB' to minimize confusion with counts that are scaled by powers
103
* of 1000. Otherwise, suffix is empty string.
104
* @param {number} val Value to be converted.
105
* @param {number=} opt_decimals The number of decimals to use. Defaults to 2.
106
* @param {boolean=} opt_suffix If true, include trailing 'B' in returned
107
* string. Default is true.
108
* @param {boolean=} opt_useSeparator If true, number and scale will be
109
* separated by a no break space. Default is false.
110
* @return {string} String representation of number of bytes.
111
*/
112
goog.format.numBytesToString = function(
113
val, opt_decimals, opt_suffix, opt_useSeparator) {
114
var suffix = '';
115
if (!goog.isDef(opt_suffix) || opt_suffix) {
116
suffix = 'B';
117
}
118
return goog.format.numericValueToString_(
119
val, goog.format.NUMERIC_SCALES_BINARY_, opt_decimals, suffix,
120
opt_useSeparator);
121
};
122
123
124
/**
125
* Converts a string to numeric value, taking into account the units.
126
* @param {string} stringValue String to be converted to numeric value.
127
* @param {Object} conversion Dictionary of conversion scales.
128
* @return {number} Numeric value for string. If it cannot be converted,
129
* returns NaN.
130
* @private
131
*/
132
goog.format.stringToNumericValue_ = function(stringValue, conversion) {
133
var match = stringValue.match(goog.format.SCALED_NUMERIC_RE_);
134
if (!match) {
135
return NaN;
136
}
137
var val = Number(match[1]) * conversion[match[2]];
138
return val;
139
};
140
141
142
/**
143
* Converts a numeric value to string, using specified conversion
144
* scales.
145
* @param {number} val Value to be converted.
146
* @param {Object} conversion Dictionary of scaling factors.
147
* @param {number=} opt_decimals The number of decimals to use. Default is 2.
148
* @param {string=} opt_suffix Optional suffix to append.
149
* @param {boolean=} opt_useSeparator If true, number and scale will be
150
* separated by a space. Default is false.
151
* @return {string} The human readable form of the byte size.
152
* @private
153
*/
154
goog.format.numericValueToString_ = function(
155
val, conversion, opt_decimals, opt_suffix, opt_useSeparator) {
156
var prefixes = goog.format.NUMERIC_SCALE_PREFIXES_;
157
var orig_val = val;
158
var symbol = '';
159
var separator = '';
160
var scale = 1;
161
if (val < 0) {
162
val = -val;
163
}
164
for (var i = 0; i < prefixes.length; i++) {
165
var unit = prefixes[i];
166
scale = conversion[unit];
167
if (val >= scale || (scale <= 1 && val > 0.1 * scale)) {
168
// Treat values less than 1 differently, allowing 0.5 to be "0.5" rather
169
// than "500m"
170
symbol = unit;
171
break;
172
}
173
}
174
if (!symbol) {
175
scale = 1;
176
} else {
177
if (opt_suffix) {
178
symbol += opt_suffix;
179
}
180
if (opt_useSeparator) {
181
separator = ' ';
182
}
183
}
184
var ex = Math.pow(10, goog.isDef(opt_decimals) ? opt_decimals : 2);
185
return Math.round(orig_val / scale * ex) / ex + separator + symbol;
186
};
187
188
189
/**
190
* Regular expression for detecting scaling units, such as K, M, G, etc. for
191
* converting a string representation to a numeric value.
192
*
193
* Also allow 'k' to be aliased to 'K'. These could be used for SI (powers
194
* of 1000) or Binary (powers of 1024) conversions.
195
*
196
* Also allow final 'B' to be interpreted as byte-count, implicitly triggering
197
* binary conversion (e.g., '10.2MB').
198
*
199
* @type {RegExp}
200
* @private
201
*/
202
goog.format.SCALED_NUMERIC_RE_ = /^([-]?\d+\.?\d*)([K,M,G,T,P,k,m,u,n]?)[B]?$/;
203
204
205
/**
206
* Ordered list of scaling prefixes in decreasing order.
207
* @private {Array<string>}
208
*/
209
goog.format.NUMERIC_SCALE_PREFIXES_ =
210
['P', 'T', 'G', 'M', 'K', '', 'm', 'u', 'n'];
211
212
213
/**
214
* Scaling factors for conversion of numeric value to string. SI conversion.
215
* @type {Object}
216
* @private
217
*/
218
goog.format.NUMERIC_SCALES_SI_ = {
219
'': 1,
220
'n': 1e-9,
221
'u': 1e-6,
222
'm': 1e-3,
223
'k': 1e3,
224
'K': 1e3,
225
'M': 1e6,
226
'G': 1e9,
227
'T': 1e12,
228
'P': 1e15
229
};
230
231
232
/**
233
* Scaling factors for conversion of numeric value to string. Binary
234
* conversion.
235
* @type {Object}
236
* @private
237
*/
238
goog.format.NUMERIC_SCALES_BINARY_ = {
239
'': 1,
240
'n': Math.pow(1024, -3),
241
'u': Math.pow(1024, -2),
242
'm': 1.0 / 1024,
243
'k': 1024,
244
'K': 1024,
245
'M': Math.pow(1024, 2),
246
'G': Math.pow(1024, 3),
247
'T': Math.pow(1024, 4),
248
'P': Math.pow(1024, 5)
249
};
250
251
252
/**
253
* First Unicode code point that has the Mark property.
254
* @type {number}
255
* @private
256
*/
257
goog.format.FIRST_GRAPHEME_EXTEND_ = 0x300;
258
259
260
/**
261
* Returns true if and only if given character should be treated as a breaking
262
* space. All ASCII control characters, the main Unicode range of spacing
263
* characters (U+2000 to U+200B inclusive except for U+2007), and several other
264
* Unicode space characters are treated as breaking spaces.
265
* @param {number} charCode The character code under consideration.
266
* @return {boolean} True if the character is a breaking space.
267
* @private
268
*/
269
goog.format.isTreatedAsBreakingSpace_ = function(charCode) {
270
return (charCode <= goog.format.WbrToken_.SPACE) ||
271
(charCode >= 0x1000 &&
272
((charCode >= 0x2000 && charCode <= 0x2006) ||
273
(charCode >= 0x2008 && charCode <= 0x200B) || charCode == 0x1680 ||
274
charCode == 0x180E || charCode == 0x2028 || charCode == 0x2029 ||
275
charCode == 0x205f || charCode == 0x3000));
276
};
277
278
279
/**
280
* Returns true if and only if given character is an invisible formatting
281
* character.
282
* @param {number} charCode The character code under consideration.
283
* @return {boolean} True if the character is an invisible formatting character.
284
* @private
285
*/
286
goog.format.isInvisibleFormattingCharacter_ = function(charCode) {
287
// See: http://unicode.org/charts/PDF/U2000.pdf
288
return (charCode >= 0x200C && charCode <= 0x200F) ||
289
(charCode >= 0x202A && charCode <= 0x202E);
290
};
291
292
293
/**
294
* Inserts word breaks into an HTML string at a given interval. The counter is
295
* reset if a space or a character which behaves like a space is encountered,
296
* but it isn't incremented if an invisible formatting character is encountered.
297
* WBRs aren't inserted into HTML tags or entities. Entities count towards the
298
* character count, HTML tags do not.
299
*
300
* With common strings aliased, objects allocations are constant based on the
301
* length of the string: N + 3. This guarantee does not hold if the string
302
* contains an element >= U+0300 and hasGraphemeBreak is non-trivial.
303
*
304
* @param {string} str HTML to insert word breaks into.
305
* @param {function(number, number, boolean): boolean} hasGraphemeBreak A
306
* function determining if there is a grapheme break between two characters,
307
* in the same signature as goog.i18n.GraphemeBreak.hasGraphemeBreak.
308
* @param {number=} opt_maxlen Maximum length after which to ensure
309
* there is a break. Default is 10 characters.
310
* @return {string} The string including word breaks.
311
* @private
312
*/
313
goog.format.insertWordBreaksGeneric_ = function(
314
str, hasGraphemeBreak, opt_maxlen) {
315
var maxlen = opt_maxlen || 10;
316
if (maxlen > str.length) return str;
317
318
var rv = [];
319
var n = 0; // The length of the current token
320
321
// This will contain the ampersand or less-than character if one of the
322
// two has been seen; otherwise, the value is zero.
323
var nestingCharCode = 0;
324
325
// First character position from input string that has not been outputted.
326
var lastDumpPosition = 0;
327
328
var charCode = 0;
329
for (var i = 0; i < str.length; i++) {
330
// Using charCodeAt versus charAt avoids allocating new string objects.
331
var lastCharCode = charCode;
332
charCode = str.charCodeAt(i);
333
334
// Don't add a WBR before characters that might be grapheme extending.
335
var isPotentiallyGraphemeExtending =
336
charCode >= goog.format.FIRST_GRAPHEME_EXTEND_ &&
337
!hasGraphemeBreak(lastCharCode, charCode, true);
338
339
// Don't add a WBR at the end of a word. For the purposes of determining
340
// work breaks, all ASCII control characters and some commonly encountered
341
// Unicode spacing characters are treated as breaking spaces.
342
if (n >= maxlen && !goog.format.isTreatedAsBreakingSpace_(charCode) &&
343
!isPotentiallyGraphemeExtending) {
344
// Flush everything seen so far, and append a word break.
345
rv.push(str.substring(lastDumpPosition, i), goog.format.WORD_BREAK_HTML);
346
lastDumpPosition = i;
347
n = 0;
348
}
349
350
if (!nestingCharCode) {
351
// Not currently within an HTML tag or entity
352
353
if (charCode == goog.format.WbrToken_.LT ||
354
charCode == goog.format.WbrToken_.AMP) {
355
// Entering an HTML Entity '&' or open tag '<'
356
nestingCharCode = charCode;
357
} else if (goog.format.isTreatedAsBreakingSpace_(charCode)) {
358
// A space or control character -- reset the token length
359
n = 0;
360
} else if (!goog.format.isInvisibleFormattingCharacter_(charCode)) {
361
// A normal flow character - increment. For grapheme extending
362
// characters, this is not *technically* a new character. However,
363
// since the grapheme break detector might be overly conservative,
364
// we have to continue incrementing, or else we won't even be able
365
// to add breaks when we get to things like punctuation. For the
366
// case where we have a full grapheme break detector, it is okay if
367
// we occasionally break slightly early.
368
n++;
369
}
370
} else if (
371
charCode == goog.format.WbrToken_.GT &&
372
nestingCharCode == goog.format.WbrToken_.LT) {
373
// Leaving an HTML tag, treat the tag as zero-length
374
nestingCharCode = 0;
375
} else if (
376
charCode == goog.format.WbrToken_.SEMI_COLON &&
377
nestingCharCode == goog.format.WbrToken_.AMP) {
378
// Leaving an HTML entity, treat it as length one
379
nestingCharCode = 0;
380
n++;
381
}
382
}
383
384
// Take care of anything we haven't flushed so far.
385
rv.push(str.substr(lastDumpPosition));
386
387
return rv.join('');
388
};
389
390
391
/**
392
* Inserts word breaks into an HTML string at a given interval.
393
*
394
* This method is as aggressive as possible, using a full table of Unicode
395
* characters where it is legal to insert word breaks; however, this table
396
* comes at a 2.5k pre-gzip (~1k post-gzip) size cost. Consider using
397
* insertWordBreaksBasic to minimize the size impact.
398
*
399
* @param {string} str HTML to insert word breaks into.
400
* @param {number=} opt_maxlen Maximum length after which to ensure there is a
401
* break. Default is 10 characters.
402
* @return {string} The string including word breaks.
403
* @deprecated Prefer wrapping with CSS word-wrap: break-word.
404
*/
405
goog.format.insertWordBreaks = function(str, opt_maxlen) {
406
return goog.format.insertWordBreaksGeneric_(
407
str, goog.i18n.GraphemeBreak.hasGraphemeBreak, opt_maxlen);
408
};
409
410
411
/**
412
* Determines conservatively if a character has a Grapheme break.
413
*
414
* Conforms to a similar signature as goog.i18n.GraphemeBreak, but is overly
415
* conservative, returning true only for characters in common scripts that
416
* are simple to account for.
417
*
418
* @param {number} lastCharCode The previous character code. Ignored.
419
* @param {number} charCode The character code under consideration. It must be
420
* at least \u0300 as a precondition -- this case is covered by
421
* insertWordBreaksGeneric_.
422
* @param {boolean=} opt_extended Ignored, to conform with the interface.
423
* @return {boolean} Whether it is one of the recognized subsets of characters
424
* with a grapheme break.
425
* @private
426
*/
427
goog.format.conservativelyHasGraphemeBreak_ = function(
428
lastCharCode, charCode, opt_extended) {
429
// Return false for everything except the most common Cyrillic characters.
430
// Don't worry about Latin characters, because insertWordBreaksGeneric_
431
// itself already handles those.
432
// TODO(gboyer): Also account for Greek, Armenian, and Georgian if it is
433
// simple to do so.
434
return charCode >= 0x400 && charCode < 0x523;
435
};
436
437
438
// TODO(gboyer): Consider using a compile-time flag to switch implementations
439
// rather than relying on the developers to toggle implementations.
440
/**
441
* Inserts word breaks into an HTML string at a given interval.
442
*
443
* This method is less aggressive than insertWordBreaks, only inserting
444
* breaks next to punctuation and between Latin or Cyrillic characters.
445
* However, this is good enough for the common case of URLs. It also
446
* works for all Latin and Cyrillic languages, plus CJK has no need for word
447
* breaks. When this method is used, goog.i18n.GraphemeBreak may be dead
448
* code eliminated.
449
*
450
* @param {string} str HTML to insert word breaks into.
451
* @param {number=} opt_maxlen Maximum length after which to ensure there is a
452
* break. Default is 10 characters.
453
* @return {string} The string including word breaks.
454
* @deprecated Prefer wrapping with CSS word-wrap: break-word.
455
*/
456
goog.format.insertWordBreaksBasic = function(str, opt_maxlen) {
457
return goog.format.insertWordBreaksGeneric_(
458
str, goog.format.conservativelyHasGraphemeBreak_, opt_maxlen);
459
};
460
461
462
/**
463
* True iff the current userAgent is IE8 or above.
464
* @type {boolean}
465
* @private
466
*/
467
goog.format.IS_IE8_OR_ABOVE_ =
468
goog.userAgent.IE && goog.userAgent.isVersionOrHigher(8);
469
470
471
/**
472
* Constant for the WBR replacement used by insertWordBreaks. Safari requires
473
* <wbr></wbr>, Opera needs the &shy; entity, though this will give a visible
474
* hyphen at breaks. IE8 uses a zero width space.
475
* Other browsers just use <wbr>.
476
* @type {string}
477
*/
478
goog.format.WORD_BREAK_HTML =
479
goog.userAgent.WEBKIT ? '<wbr></wbr>' : goog.userAgent.OPERA ?
480
'&shy;' :
481
goog.format.IS_IE8_OR_ABOVE_ ? '&#8203;' : '<wbr>';
482
483
484
/**
485
* Tokens used within insertWordBreaks.
486
* @private
487
* @enum {number}
488
*/
489
goog.format.WbrToken_ = {
490
LT: 60, // '<'.charCodeAt(0)
491
GT: 62, // '>'.charCodeAt(0)
492
AMP: 38, // '&'.charCodeAt(0)
493
SEMI_COLON: 59, // ';'.charCodeAt(0)
494
SPACE: 32 // ' '.charCodeAt(0)
495
};
496
497