Path: blob/trunk/third_party/closure/goog/i18n/uchar.js
2868 views
// Copyright 2009 The Closure Library Authors. All Rights Reserved.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS-IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.1314/**15* @fileoverview Collection of utility functions for Unicode character.16*17*/1819goog.provide('goog.i18n.uChar');202122// Constants for handling Unicode supplementary characters (surrogate pairs).232425/**26* The minimum value for Supplementary code points.27* @type {number}28* @private29*/30goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ = 0x10000;313233/**34* The highest Unicode code point value (scalar value) according to the Unicode35* Standard.36* @type {number}37* @private38*/39goog.i18n.uChar.CODE_POINT_MAX_VALUE_ = 0x10FFFF;404142/**43* Lead surrogate minimum value.44* @type {number}45* @private46*/47goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ = 0xD800;484950/**51* Lead surrogate maximum value.52* @type {number}53* @private54*/55goog.i18n.uChar.LEAD_SURROGATE_MAX_VALUE_ = 0xDBFF;565758/**59* Trail surrogate minimum value.60* @type {number}61* @private62*/63goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;646566/**67* Trail surrogate maximum value.68* @type {number}69* @private70*/71goog.i18n.uChar.TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;727374/**75* The number of least significant bits of a supplementary code point that in76* UTF-16 become the least significant bits of the trail surrogate. The rest of77* the in-use bits of the supplementary code point become the least significant78* bits of the lead surrogate.79* @type {number}80* @private81*/82goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_ = 10;838485/**86* Gets the U+ notation string of a Unicode character. Ex: 'U+0041' for 'A'.87* @param {string} ch The given character.88* @return {string} The U+ notation of the given character.89*/90goog.i18n.uChar.toHexString = function(ch) {91var chCode = goog.i18n.uChar.toCharCode(ch);92var chCodeStr = 'U+' +93goog.i18n.uChar.padString_(chCode.toString(16).toUpperCase(), 4, '0');9495return chCodeStr;96};979899/**100* Gets a string padded with given character to get given size.101* @param {string} str The given string to be padded.102* @param {number} length The target size of the string.103* @param {string} ch The character to be padded with.104* @return {string} The padded string.105* @private106*/107goog.i18n.uChar.padString_ = function(str, length, ch) {108while (str.length < length) {109str = ch + str;110}111return str;112};113114115/**116* Gets Unicode value of the given character.117* @param {string} ch The given character, which in the case of a supplementary118* character is actually a surrogate pair. The remainder of the string is119* ignored.120* @return {number} The Unicode value of the character.121*/122goog.i18n.uChar.toCharCode = function(ch) {123return goog.i18n.uChar.getCodePointAround(ch, 0);124};125126127/**128* Gets a character from the given Unicode value. If the given code point is not129* a valid Unicode code point, null is returned.130* @param {number} code The Unicode value of the character.131* @return {?string} The character corresponding to the given Unicode value.132*/133goog.i18n.uChar.fromCharCode = function(code) {134if (!goog.isDefAndNotNull(code) ||135!(code >= 0 && code <= goog.i18n.uChar.CODE_POINT_MAX_VALUE_)) {136return null;137}138if (goog.i18n.uChar.isSupplementaryCodePoint(code)) {139// First, we split the code point into the trail surrogate part (the140// TRAIL_SURROGATE_BIT_COUNT_ least significant bits) and the lead surrogate141// part (the rest of the bits, shifted down; note that for now this includes142// the supplementary offset, also shifted down, to be subtracted off below).143var leadBits = code >> goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_;144var trailBits = code &145// A bit-mask to get the TRAIL_SURROGATE_BIT_COUNT_ (i.e. 10) least146// significant bits. 1 << 10 = 0x0400. 0x0400 - 1 = 0x03FF.147((1 << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_) - 1);148149// Now we calculate the code point of each surrogate by adding each offset150// to the corresponding base code point.151var leadCodePoint = leadBits +152(goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ -153// Subtract off the supplementary offset, which had been shifted down154// with the rest of leadBits. We do this here instead of before the155// shift in order to save a separate subtraction step.156(goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ >>157goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_));158var trailCodePoint = trailBits + goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_;159160// Convert the code points into a 2-character long string.161return String.fromCharCode(leadCodePoint) +162String.fromCharCode(trailCodePoint);163}164return String.fromCharCode(code);165};166167168/**169* Returns the Unicode code point at the specified index.170*171* If the char value specified at the given index is in the leading-surrogate172* range, and the following index is less than the length of {@code string}, and173* the char value at the following index is in the trailing-surrogate range,174* then the supplementary code point corresponding to this surrogate pair is175* returned.176*177* If the char value specified at the given index is in the trailing-surrogate178* range, and the preceding index is not before the start of {@code string}, and179* the char value at the preceding index is in the leading-surrogate range, then180* the negated supplementary code point corresponding to this surrogate pair is181* returned.182*183* The negation allows the caller to differentiate between the case where the184* given index is at the leading surrogate and the one where it is at the185* trailing surrogate, and thus deduce where the next character starts and186* preceding character ends.187*188* Otherwise, the char value at the given index is returned. Thus, a leading189* surrogate is returned when it is not followed by a trailing surrogate, and a190* trailing surrogate is returned when it is not preceded by a leading191* surrogate.192*193* @param {string} string The string.194* @param {number} index The index from which the code point is to be retrieved.195* @return {number} The code point at the given index. If the given index is196* that of the start (i.e. lead surrogate) of a surrogate pair, returns the code197* point encoded by the pair. If the given index is that of the end (i.e. trail198* surrogate) of a surrogate pair, returns the negated code pointed encoded by199* the pair.200*/201goog.i18n.uChar.getCodePointAround = function(string, index) {202var charCode = string.charCodeAt(index);203if (goog.i18n.uChar.isLeadSurrogateCodePoint(charCode) &&204index + 1 < string.length) {205var trail = string.charCodeAt(index + 1);206if (goog.i18n.uChar.isTrailSurrogateCodePoint(trail)) {207// Part of a surrogate pair.208return /** @type {number} */ (209goog.i18n.uChar.buildSupplementaryCodePoint(charCode, trail));210}211} else if (goog.i18n.uChar.isTrailSurrogateCodePoint(charCode) && index > 0) {212var lead = string.charCodeAt(index - 1);213if (goog.i18n.uChar.isLeadSurrogateCodePoint(lead)) {214// Part of a surrogate pair.215return /** @type {number} */ (216-goog.i18n.uChar.buildSupplementaryCodePoint(lead, charCode));217}218}219return charCode;220};221222223/**224* Determines the length of the string needed to represent the specified225* Unicode code point.226* @param {number} codePoint227* @return {number} 2 if codePoint is a supplementary character, 1 otherwise.228*/229goog.i18n.uChar.charCount = function(codePoint) {230return goog.i18n.uChar.isSupplementaryCodePoint(codePoint) ? 2 : 1;231};232233234/**235* Determines whether the specified Unicode code point is in the supplementary236* Unicode characters range.237* @param {number} codePoint238* @return {boolean} Whether then given code point is a supplementary character.239*/240goog.i18n.uChar.isSupplementaryCodePoint = function(codePoint) {241return codePoint >= goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_ &&242codePoint <= goog.i18n.uChar.CODE_POINT_MAX_VALUE_;243};244245246/**247* Gets whether the given code point is a leading surrogate character.248* @param {number} codePoint249* @return {boolean} Whether the given code point is a leading surrogate250* character.251*/252goog.i18n.uChar.isLeadSurrogateCodePoint = function(codePoint) {253return codePoint >= goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_ &&254codePoint <= goog.i18n.uChar.LEAD_SURROGATE_MAX_VALUE_;255};256257258/**259* Gets whether the given code point is a trailing surrogate character.260* @param {number} codePoint261* @return {boolean} Whether the given code point is a trailing surrogate262* character.263*/264goog.i18n.uChar.isTrailSurrogateCodePoint = function(codePoint) {265return codePoint >= goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ &&266codePoint <= goog.i18n.uChar.TRAIL_SURROGATE_MAX_VALUE_;267};268269270/**271* Composes a supplementary Unicode code point from the given UTF-16 surrogate272* pair. If leadSurrogate isn't a leading surrogate code point or trailSurrogate273* isn't a trailing surrogate code point, null is returned.274* @param {number} lead The leading surrogate code point.275* @param {number} trail The trailing surrogate code point.276* @return {?number} The supplementary Unicode code point obtained by decoding277* the given UTF-16 surrogate pair.278*/279goog.i18n.uChar.buildSupplementaryCodePoint = function(lead, trail) {280if (goog.i18n.uChar.isLeadSurrogateCodePoint(lead) &&281goog.i18n.uChar.isTrailSurrogateCodePoint(trail)) {282var shiftedLeadOffset =283(lead << goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_) -284(goog.i18n.uChar.LEAD_SURROGATE_MIN_VALUE_285<< goog.i18n.uChar.TRAIL_SURROGATE_BIT_COUNT_);286var trailOffset = trail - goog.i18n.uChar.TRAIL_SURROGATE_MIN_VALUE_ +287goog.i18n.uChar.SUPPLEMENTARY_CODE_POINT_MIN_VALUE_;288return shiftedLeadOffset + trailOffset;289}290return null;291};292293294