Path: blob/trunk/third_party/closure/goog/i18n/graphemebreak.js
2868 views
// Copyright 2006 The Closure Library Authors. All Rights Reserved.1//2// Licensed under the Apache License, Version 2.0 (the "License");3// you may not use this file except in compliance with the License.4// You may obtain a copy of the License at5//6// http://www.apache.org/licenses/LICENSE-2.07//8// Unless required by applicable law or agreed to in writing, software9// distributed under the License is distributed on an "AS-IS" BASIS,10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.11// See the License for the specific language governing permissions and12// limitations under the License.1314/**15* @fileoverview Detect Grapheme Cluster Break in a pair of codepoints. Follows16* Unicode 5.1 UAX#29. Tailoring for Virama × Indic Consonants is used.17*18*/1920goog.provide('goog.i18n.GraphemeBreak');2122goog.require('goog.structs.InversionMap');232425/**26* Enum for all Grapheme Cluster Break properties.27* These enums directly corresponds to Grapheme_Cluster_Break property values28* mentioned in http://unicode.org/reports/tr29 table 2. VIRAMA and29* INDIC_CONSONANT are for the Virama × Base tailoring mentioned in the notes.30*31* CR and LF are moved to the bottom of the list because they occur only once32* and so good candidates to take 2 decimal digit values.33* @enum {number}34* @protected35*/36goog.i18n.GraphemeBreak.property = {37ANY: 0,38CONTROL: 1,39EXTEND: 2,40PREPEND: 3,41SPACING_MARK: 4,42INDIC_CONSONANT: 5,43VIRAMA: 6,44L: 7,45V: 8,46T: 9,47LV: 10,48LVT: 11,49CR: 12,50LF: 13,51REGIONAL_INDICATOR: 1452};535455/**56* Grapheme Cluster Break property values for all codepoints as inversion map.57* Constructed lazily.58*59* @type {goog.structs.InversionMap}60* @private61*/62goog.i18n.GraphemeBreak.inversions_ = null;636465/**66* There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method67* is to check for legacy rules.68*69* @param {number} prop_a The property enum value of the first character.70* @param {number} prop_b The property enum value of the second character.71* @return {boolean} True if a & b do not form a cluster; False otherwise.72* @private73*/74goog.i18n.GraphemeBreak.applyLegacyBreakRules_ = function(prop_a, prop_b) {7576var prop = goog.i18n.GraphemeBreak.property;7778if (prop_a == prop.CR && prop_b == prop.LF) {79return false;80}81if (prop_a == prop.CONTROL || prop_a == prop.CR || prop_a == prop.LF) {82return true;83}84if (prop_b == prop.CONTROL || prop_b == prop.CR || prop_b == prop.LF) {85return true;86}87if ((prop_a == prop.L) && (prop_b == prop.L || prop_b == prop.V ||88prop_b == prop.LV || prop_b == prop.LVT)) {89return false;90}91if ((prop_a == prop.LV || prop_a == prop.V) &&92(prop_b == prop.V || prop_b == prop.T)) {93return false;94}95if ((prop_a == prop.LVT || prop_a == prop.T) && (prop_b == prop.T)) {96return false;97}98if (prop_b == prop.EXTEND || prop_b == prop.VIRAMA) {99return false;100}101if (prop_a == prop.VIRAMA && prop_b == prop.INDIC_CONSONANT) {102return false;103}104return true;105};106107108/**109* Method to return property enum value of the codepoint. If it is Hangul LV or110* LVT, then it is computed; for the rest it is picked from the inversion map.111* @param {number} acode The code point value of the character.112* @return {number} Property enum value of codepoint.113* @private114*/115goog.i18n.GraphemeBreak.getBreakProp_ = function(acode) {116if (0xAC00 <= acode && acode <= 0xD7A3) {117var prop = goog.i18n.GraphemeBreak.property;118if (acode % 0x1C == 0x10) {119return prop.LV;120}121return prop.LVT;122} else {123if (!goog.i18n.GraphemeBreak.inversions_) {124goog.i18n.GraphemeBreak.inversions_ = new goog.structs.InversionMap(125[1260, 10, 1, 2, 1, 18, 95, 33, 13, 1, 594,127112, 275, 7, 263, 45, 1, 1, 1, 2, 1, 2,1281, 1, 56, 5, 11, 11, 48, 21, 16, 1, 101,1297, 1, 1, 6, 2, 2, 1, 4, 33, 1, 1,1301, 30, 27, 91, 11, 58, 9, 34, 4, 1, 9,1311, 3, 1, 5, 43, 3, 136, 31, 1, 17, 37,1321, 1, 1, 1, 3, 8, 4, 1, 2, 1, 7,1338, 2, 2, 21, 8, 1, 2, 17, 39, 1, 1,1341, 2, 6, 6, 1, 9, 5, 4, 2, 2, 12,1352, 15, 2, 1, 17, 39, 2, 3, 12, 4, 8,1366, 17, 2, 3, 14, 1, 17, 39, 1, 1, 3,1378, 4, 1, 20, 2, 29, 1, 2, 17, 39, 1,1381, 2, 1, 6, 6, 9, 6, 4, 2, 2, 13,1391, 16, 1, 18, 41, 1, 1, 1, 12, 1, 9,1401, 41, 3, 17, 37, 4, 3, 5, 7, 8, 3,1412, 8, 2, 30, 2, 17, 39, 1, 1, 1, 1,1422, 1, 3, 1, 5, 1, 8, 9, 1, 3, 2,14330, 2, 17, 38, 3, 1, 2, 5, 7, 1, 9,1441, 10, 2, 30, 2, 22, 48, 5, 1, 2, 6,1457, 19, 2, 13, 46, 2, 1, 1, 1, 6, 1,14612, 8, 50, 46, 2, 1, 1, 1, 9, 11, 6,14714, 2, 58, 2, 27, 1, 1, 1, 1, 1, 4,1482, 49, 14, 1, 4, 1, 1, 2, 5, 48, 9,1491, 57, 33, 12, 4, 1, 6, 1, 2, 2, 2,1501, 16, 2, 4, 2, 2, 4, 3, 1, 3, 2,1517, 3, 4, 13, 1, 1, 1, 2, 6, 1, 1,15214, 1, 98, 96, 72, 88, 349, 3, 931, 15, 2,1531, 14, 15, 2, 1, 14, 15, 2, 15, 15, 14,15435, 17, 2, 1, 7, 8, 1, 2, 9, 1, 1,1559, 1, 45, 3, 155, 1, 87, 31, 3, 4, 2,1569, 1, 6, 3, 20, 19, 29, 44, 9, 3, 2,1571, 69, 23, 2, 3, 4, 45, 6, 2, 1, 1,1581, 8, 1, 1, 1, 2, 8, 6, 13, 128, 4,1591, 14, 33, 1, 1, 5, 1, 1, 5, 1, 1,1601, 7, 31, 9, 12, 2, 1, 7, 23, 1, 4,1612, 2, 2, 2, 2, 11, 3, 2, 36, 2, 1,1621, 2, 3, 1, 1, 3, 2, 12, 36, 8, 8,1632, 2, 21, 3, 128, 3, 1, 13, 1, 7, 4,1641, 4, 2, 1, 203, 64, 523, 1, 2, 2, 24,1657, 49, 16, 96, 33, 3070, 3, 141, 1, 96, 32,166554, 6, 105, 2, 30164, 4, 1, 10, 33, 1, 80,1672, 272, 1, 3, 1, 4, 1, 23, 2, 2, 1,16824, 30, 4, 4, 3, 8, 1, 1, 13, 2, 16,16934, 16, 1, 27, 18, 24, 24, 4, 8, 2, 23,17011, 1, 1, 12, 32, 3, 1, 5, 3, 3, 36,1711, 2, 4, 2, 1, 3, 1, 69, 35, 6, 2,1722, 2, 2, 12, 1, 8, 1, 1, 18, 16, 1,1733, 6, 1, 5, 48, 1, 1, 3, 2, 2, 5,1742, 1, 1, 32, 9, 1, 2, 2, 5, 1, 1,175201, 14, 2, 1, 1, 9, 8, 2, 1, 2, 1,1762, 1, 1, 1, 18, 11184, 27, 49, 1028, 1024, 6942,1771, 737, 16, 16, 7, 216, 1, 158, 2, 89, 3,178513, 1, 2051, 15, 40, 7, 1, 1472, 1, 1, 1,17953, 14, 1, 57, 2, 1, 45, 3, 4, 2, 1,1801, 2, 1, 66, 3, 36, 5, 1, 6, 2, 75,1812, 1, 48, 3, 9, 1, 1, 1258, 1, 1, 1,1822, 6, 1, 1, 22681, 62, 4, 25042, 1, 1, 3,1833, 1, 5, 8, 8, 2, 7, 30, 4, 148, 3,1848097, 26, 790017, 255185],186[1871, 13, 1, 12, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2,1880, 2, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 1,1890, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 4,1900, 5, 2, 4, 2, 0, 4, 2, 4, 6, 4, 0, 2, 5, 0, 2, 0, 5, 2, 4, 0,1915, 2, 0, 2, 4, 2, 4, 6, 0, 2, 5, 0, 2, 0, 5, 0, 2, 4, 0, 5, 2,1924, 2, 6, 2, 5, 0, 2, 0, 2, 4, 0, 5, 2, 0, 4, 2, 4, 6, 0, 2, 0,1932, 4, 0, 5, 2, 0, 2, 4, 2, 4, 6, 2, 5, 0, 2, 0, 5, 0, 2, 0, 5,1942, 4, 2, 4, 6, 0, 2, 0, 4, 0, 5, 0, 2, 4, 2, 6, 2, 5, 0, 2, 0,1954, 0, 5, 2, 0, 4, 2, 4, 2, 4, 2, 4, 2, 6, 2, 5, 0, 2, 0, 4, 0,1965, 0, 2, 4, 2, 4, 6, 0, 2, 0, 2, 0, 4, 0, 5, 6, 2, 4, 2, 4, 2,1974, 0, 5, 0, 2, 0, 4, 2, 6, 0, 2, 0, 5, 0, 2, 0, 4, 2, 0, 2, 0,1985, 0, 2, 0, 2, 0, 2, 0, 2, 0, 4, 5, 2, 4, 2, 6, 0, 2, 0, 2, 0,1992, 0, 5, 0, 2, 4, 2, 0, 6, 4, 2, 5, 0, 5, 0, 4, 2, 5, 2, 5, 0,2005, 0, 5, 2, 5, 2, 0, 4, 2, 0, 2, 5, 0, 2, 0, 7, 8, 9, 0, 2, 0,2015, 2, 6, 0, 5, 2, 6, 0, 5, 2, 0, 5, 2, 5, 0, 2, 4, 2, 4, 2, 4,2022, 6, 2, 0, 2, 0, 2, 0, 2, 0, 5, 2, 4, 2, 4, 2, 4, 2, 0, 5, 0,2035, 0, 4, 0, 4, 0, 5, 2, 4, 0, 5, 0, 5, 4, 2, 4, 2, 6, 0, 2, 0,2042, 4, 2, 0, 2, 4, 0, 5, 2, 4, 2, 4, 2, 4, 2, 4, 6, 5, 0, 2, 0,2052, 4, 0, 5, 4, 2, 4, 2, 6, 4, 5, 0, 5, 0, 5, 0, 2, 4, 2, 4, 2,2064, 2, 6, 0, 5, 4, 2, 4, 2, 0, 5, 0, 2, 0, 2, 4, 2, 0, 2, 0, 4,2072, 0, 2, 0, 1, 2, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 6, 0, 2, 0, 2,2080, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 6, 5, 2, 5, 4, 2, 4, 0,2095, 0, 5, 0, 5, 0, 5, 0, 4, 0, 5, 4, 6, 0, 2, 0, 5, 0, 2, 0, 5,2102, 4, 6, 0, 7, 2, 4, 0, 5, 0, 5, 2, 4, 2, 4, 2, 4, 6, 0, 5, 2,2114, 2, 4, 2, 0, 2, 0, 2, 4, 0, 5, 0, 5, 0, 5, 0, 5, 2, 0, 2, 0,2122, 0, 2, 0, 2, 0, 5, 4, 2, 4, 0, 4, 6, 0, 5, 0, 5, 0, 5, 0, 4,2132, 4, 2, 4, 0, 4, 6, 0, 11, 8, 9, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1,2140, 2, 0, 1, 0, 2, 0, 2, 0, 2, 6, 0, 4, 2, 4, 0, 2, 6, 0, 2, 4,2150, 4, 2, 4, 6, 2, 0, 1, 0, 2, 0, 2, 4, 2, 6, 0, 2, 4, 0, 4, 2,2164, 6, 0, 2, 4, 2, 4, 2, 6, 2, 0, 4, 2, 0, 2, 4, 2, 0, 4, 2, 1,2172, 0, 2, 0, 2, 0, 2, 0, 14, 0, 1, 2218],219true);220}221return /** @type {number} */ (222goog.i18n.GraphemeBreak.inversions_.at(acode));223}224};225226227/**228* There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method229* is to check for both using a boolean flag to switch between them.230* @param {number} a The code point value of the first character.231* @param {number} b The code point value of the second character.232* @param {boolean=} opt_extended If true, indicates extended grapheme cluster;233* If false, indicates legacy cluster.234* @return {boolean} True if a & b do not form a cluster; False otherwise.235*/236goog.i18n.GraphemeBreak.hasGraphemeBreak = function(a, b, opt_extended) {237238var prop_a = goog.i18n.GraphemeBreak.getBreakProp_(a);239var prop_b = goog.i18n.GraphemeBreak.getBreakProp_(b);240var prop = goog.i18n.GraphemeBreak.property;241242return goog.i18n.GraphemeBreak.applyLegacyBreakRules_(prop_a, prop_b) &&243!(opt_extended &&244(prop_a == prop.PREPEND || prop_b == prop.SPACING_MARK));245};246247248