215 lines
9.3 KiB
JavaScript
215 lines
9.3 KiB
JavaScript
// Copyright 2006 The Closure Library Authors. All Rights Reserved.
|
||
//
|
||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
// you may not use this file except in compliance with the License.
|
||
// You may obtain a copy of the License at
|
||
//
|
||
// http://www.apache.org/licenses/LICENSE-2.0
|
||
//
|
||
// Unless required by applicable law or agreed to in writing, software
|
||
// distributed under the License is distributed on an "AS-IS" BASIS,
|
||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
// See the License for the specific language governing permissions and
|
||
// limitations under the License.
|
||
|
||
/**
|
||
* @fileoverview Detect Grapheme Cluster Break in a pair of codepoints. Follows
|
||
* Unicode 5.1 UAX#29. Tailoring for Virama × Indic Consonants is used.
|
||
*
|
||
*/
|
||
|
||
goog.provide('goog.i18n.GraphemeBreak');
|
||
goog.require('goog.structs.InversionMap');
|
||
|
||
|
||
/**
|
||
* Enum for all Grapheme Cluster Break properties.
|
||
* These enums directly corresponds to Grapheme_Cluster_Break property values
|
||
* mentioned in http://unicode.org/reports/tr29 table 2. VIRAMA and
|
||
* INDIC_CONSONANT are for the Virama × Base tailoring mentioned in the notes.
|
||
*
|
||
* CR and LF are moved to the bottom of the list because they occur only once
|
||
* and so good candidates to take 2 decimal digit values.
|
||
* @enum {number}
|
||
* @protected
|
||
*/
|
||
goog.i18n.GraphemeBreak.property = {
|
||
ANY: 0,
|
||
CONTROL: 1,
|
||
EXTEND: 2,
|
||
PREPEND: 3,
|
||
SPACING_MARK: 4,
|
||
INDIC_CONSONANT: 5,
|
||
VIRAMA: 6,
|
||
L: 7,
|
||
V: 8,
|
||
T: 9,
|
||
LV: 10,
|
||
LVT: 11,
|
||
CR: 12,
|
||
LF: 13,
|
||
REGIONAL_INDICATOR: 14
|
||
};
|
||
|
||
|
||
/**
|
||
* Grapheme Cluster Break property values for all codepoints as inversion map.
|
||
* Constructed lazily.
|
||
*
|
||
* @type {goog.structs.InversionMap}
|
||
* @private
|
||
*/
|
||
goog.i18n.GraphemeBreak.inversions_ = null;
|
||
|
||
|
||
/**
|
||
* There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method
|
||
* is to check for legacy rules.
|
||
*
|
||
* @param {number} prop_a The property enum value of the first character.
|
||
* @param {number} prop_b The property enum value of the second character.
|
||
* @return {boolean} True if a & b do not form a cluster; False otherwise.
|
||
* @private
|
||
*/
|
||
goog.i18n.GraphemeBreak.applyLegacyBreakRules_ = function(prop_a, prop_b) {
|
||
|
||
var prop = goog.i18n.GraphemeBreak.property;
|
||
|
||
if (prop_a == prop.CR && prop_b == prop.LF) {
|
||
return false;
|
||
}
|
||
if (prop_a == prop.CONTROL || prop_a == prop.CR || prop_a == prop.LF) {
|
||
return true;
|
||
}
|
||
if (prop_b == prop.CONTROL || prop_b == prop.CR || prop_b == prop.LF) {
|
||
return true;
|
||
}
|
||
if ((prop_a == prop.L) &&
|
||
(prop_b == prop.L || prop_b == prop.V ||
|
||
prop_b == prop.LV || prop_b == prop.LVT)) {
|
||
return false;
|
||
}
|
||
if ((prop_a == prop.LV || prop_a == prop.V) &&
|
||
(prop_b == prop.V || prop_b == prop.T)) {
|
||
return false;
|
||
}
|
||
if ((prop_a == prop.LVT || prop_a == prop.T) && (prop_b == prop.T)) {
|
||
return false;
|
||
}
|
||
if (prop_b == prop.EXTEND || prop_b == prop.VIRAMA) {
|
||
return false;
|
||
}
|
||
if (prop_a == prop.VIRAMA && prop_b == prop.INDIC_CONSONANT) {
|
||
return false;
|
||
}
|
||
return true;
|
||
};
|
||
|
||
|
||
/**
|
||
* Method to return property enum value of the codepoint. If it is Hangul LV or
|
||
* LVT, then it is computed; for the rest it is picked from the inversion map.
|
||
* @param {number} acode The code point value of the character.
|
||
* @return {number} Property enum value of codepoint.
|
||
* @private
|
||
*/
|
||
goog.i18n.GraphemeBreak.getBreakProp_ = function(acode) {
|
||
if (0xAC00 <= acode && acode <= 0xD7A3) {
|
||
var prop = goog.i18n.GraphemeBreak.property;
|
||
if (acode % 0x1C == 0x10) {
|
||
return prop.LV;
|
||
}
|
||
return prop.LVT;
|
||
} else {
|
||
if (!goog.i18n.GraphemeBreak.inversions_) {
|
||
goog.i18n.GraphemeBreak.inversions_ = new goog.structs.InversionMap(
|
||
[0, 10, 1, 2, 1, 18, 95, 33, 13, 1, 594, 112, 275, 7, 263, 45, 1, 1,
|
||
1, 2, 1, 2, 1, 1, 56, 5, 11, 11, 48, 21, 16, 1, 101, 7, 1, 1, 6, 2,
|
||
2, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 9, 34, 4, 1, 9, 1, 3, 1,
|
||
5, 43, 3, 136, 31, 1, 17, 37, 1, 1, 1, 1, 3, 8, 4, 1, 2, 1, 7, 8, 2,
|
||
2, 21, 8, 1, 2, 17, 39, 1, 1, 1, 2, 6, 6, 1, 9, 5, 4, 2, 2, 12, 2,
|
||
15, 2, 1, 17, 39, 2, 3, 12, 4, 8, 6, 17, 2, 3, 14, 1, 17, 39, 1, 1,
|
||
3, 8, 4, 1, 20, 2, 29, 1, 2, 17, 39, 1, 1, 2, 1, 6, 6, 9, 6, 4, 2,
|
||
2, 13, 1, 16, 1, 18, 41, 1, 1, 1, 12, 1, 9, 1, 41, 3, 17, 37, 4, 3,
|
||
5, 7, 8, 3, 2, 8, 2, 30, 2, 17, 39, 1, 1, 1, 1, 2, 1, 3, 1, 5, 1, 8,
|
||
9, 1, 3, 2, 30, 2, 17, 38, 3, 1, 2, 5, 7, 1, 9, 1, 10, 2, 30, 2, 22,
|
||
48, 5, 1, 2, 6, 7, 19, 2, 13, 46, 2, 1, 1, 1, 6, 1, 12, 8, 50, 46,
|
||
2, 1, 1, 1, 9, 11, 6, 14, 2, 58, 2, 27, 1, 1, 1, 1, 1, 4, 2, 49, 14,
|
||
1, 4, 1, 1, 2, 5, 48, 9, 1, 57, 33, 12, 4, 1, 6, 1, 2, 2, 2, 1, 16,
|
||
2, 4, 2, 2, 4, 3, 1, 3, 2, 7, 3, 4, 13, 1, 1, 1, 2, 6, 1, 1, 14, 1,
|
||
98, 96, 72, 88, 349, 3, 931, 15, 2, 1, 14, 15, 2, 1, 14, 15, 2, 15,
|
||
15, 14, 35, 17, 2, 1, 7, 8, 1, 2, 9, 1, 1, 9, 1, 45, 3, 155, 1, 87,
|
||
31, 3, 4, 2, 9, 1, 6, 3, 20, 19, 29, 44, 9, 3, 2, 1, 69, 23, 2, 3,
|
||
4, 45, 6, 2, 1, 1, 1, 8, 1, 1, 1, 2, 8, 6, 13, 128, 4, 1, 14, 33, 1,
|
||
1, 5, 1, 1, 5, 1, 1, 1, 7, 31, 9, 12, 2, 1, 7, 23, 1, 4, 2, 2, 2, 2,
|
||
2, 11, 3, 2, 36, 2, 1, 1, 2, 3, 1, 1, 3, 2, 12, 36, 8, 8, 2, 2, 21,
|
||
3, 128, 3, 1, 13, 1, 7, 4, 1, 4, 2, 1, 203, 64, 523, 1, 2, 2, 24, 7,
|
||
49, 16, 96, 33, 3070, 3, 141, 1, 96, 32, 554, 6, 105, 2, 30164, 4,
|
||
1, 10, 33, 1, 80, 2, 272, 1, 3, 1, 4, 1, 23, 2, 2, 1, 24, 30, 4, 4,
|
||
3, 8, 1, 1, 13, 2, 16, 34, 16, 1, 27, 18, 24, 24, 4, 8, 2, 23, 11,
|
||
1, 1, 12, 32, 3, 1, 5, 3, 3, 36, 1, 2, 4, 2, 1, 3, 1, 69, 35, 6, 2,
|
||
2, 2, 2, 12, 1, 8, 1, 1, 18, 16, 1, 3, 6, 1, 5, 48, 1, 1, 3, 2, 2,
|
||
5, 2, 1, 1, 32, 9, 1, 2, 2, 5, 1, 1, 201, 14, 2, 1, 1, 9, 8, 2, 1,
|
||
2, 1, 2, 1, 1, 1, 18, 11184, 27, 49, 1028, 1024, 6942, 1, 737, 16,
|
||
16, 7, 216, 1, 158, 2, 89, 3, 513, 1, 2051, 15, 40, 7, 1, 1472, 1,
|
||
1, 1, 53, 14, 1, 57, 2, 1, 45, 3, 4, 2, 1, 1, 2, 1, 66, 3, 36, 5, 1,
|
||
6, 2, 75, 2, 1, 48, 3, 9, 1, 1, 1258, 1, 1, 1, 2, 6, 1, 1, 22681,
|
||
62, 4, 25042, 1, 1, 3, 3, 1, 5, 8, 8, 2, 7, 30, 4, 148, 3, 8097, 26,
|
||
790017, 255],
|
||
[1, 13, 1, 12, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
|
||
2, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 1, 0, 2, 0,
|
||
2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 4, 0, 5, 2, 4, 2,
|
||
0, 4, 2, 4, 6, 4, 0, 2, 5, 0, 2, 0, 5, 2, 4, 0, 5, 2, 0, 2, 4, 2, 4,
|
||
6, 0, 2, 5, 0, 2, 0, 5, 0, 2, 4, 0, 5, 2, 4, 2, 6, 2, 5, 0, 2, 0, 2,
|
||
4, 0, 5, 2, 0, 4, 2, 4, 6, 0, 2, 0, 2, 4, 0, 5, 2, 0, 2, 4, 2, 4, 6,
|
||
2, 5, 0, 2, 0, 5, 0, 2, 0, 5, 2, 4, 2, 4, 6, 0, 2, 0, 4, 0, 5, 0, 2,
|
||
4, 2, 6, 2, 5, 0, 2, 0, 4, 0, 5, 2, 0, 4, 2, 4, 2, 4, 2, 4, 2, 6, 2,
|
||
5, 0, 2, 0, 4, 0, 5, 0, 2, 4, 2, 4, 6, 0, 2, 0, 2, 0, 4, 0, 5, 6, 2,
|
||
4, 2, 4, 2, 4, 0, 5, 0, 2, 0, 4, 2, 6, 0, 2, 0, 5, 0, 2, 0, 4, 2, 0,
|
||
2, 0, 5, 0, 2, 0, 2, 0, 2, 0, 2, 0, 4, 5, 2, 4, 2, 6, 0, 2, 0, 2, 0,
|
||
2, 0, 5, 0, 2, 4, 2, 0, 6, 4, 2, 5, 0, 5, 0, 4, 2, 5, 2, 5, 0, 5, 0,
|
||
5, 2, 5, 2, 0, 4, 2, 0, 2, 5, 0, 2, 0, 7, 8, 9, 0, 2, 0, 5, 2, 6, 0,
|
||
5, 2, 6, 0, 5, 2, 0, 5, 2, 5, 0, 2, 4, 2, 4, 2, 4, 2, 6, 2, 0, 2, 0,
|
||
2, 0, 2, 0, 5, 2, 4, 2, 4, 2, 4, 2, 0, 5, 0, 5, 0, 4, 0, 4, 0, 5, 2,
|
||
4, 0, 5, 0, 5, 4, 2, 4, 2, 6, 0, 2, 0, 2, 4, 2, 0, 2, 4, 0, 5, 2, 4,
|
||
2, 4, 2, 4, 2, 4, 6, 5, 0, 2, 0, 2, 4, 0, 5, 4, 2, 4, 2, 6, 4, 5, 0,
|
||
5, 0, 5, 0, 2, 4, 2, 4, 2, 4, 2, 6, 0, 5, 4, 2, 4, 2, 0, 5, 0, 2, 0,
|
||
2, 4, 2, 0, 2, 0, 4, 2, 0, 2, 0, 1, 2, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0,
|
||
6, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 6, 5, 2, 5, 4,
|
||
2, 4, 0, 5, 0, 5, 0, 5, 0, 5, 0, 4, 0, 5, 4, 6, 0, 2, 0, 5, 0, 2, 0,
|
||
5, 2, 4, 6, 0, 7, 2, 4, 0, 5, 0, 5, 2, 4, 2, 4, 2, 4, 6, 0, 5, 2, 4,
|
||
2, 4, 2, 0, 2, 0, 2, 4, 0, 5, 0, 5, 0, 5, 0, 5, 2, 0, 2, 0, 2, 0, 2,
|
||
0, 2, 0, 5, 4, 2, 4, 0, 4, 6, 0, 5, 0, 5, 0, 5, 0, 4, 2, 4, 2, 4, 0,
|
||
4, 6, 0, 11, 8, 9, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2,
|
||
0, 2, 0, 2, 6, 0, 4, 2, 4, 0, 2, 6, 0, 2, 4, 0, 4, 2, 4, 6, 2, 0, 1,
|
||
0, 2, 0, 2, 4, 2, 6, 0, 2, 4, 0, 4, 2, 4, 6, 0, 2, 4, 2, 4, 2, 6, 2,
|
||
0, 4, 2, 0, 2, 4, 2, 0, 4, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 14, 0, 1,
|
||
2],
|
||
true);
|
||
}
|
||
return /** @type {number} */ (
|
||
goog.i18n.GraphemeBreak.inversions_.at(acode));
|
||
}
|
||
};
|
||
|
||
|
||
/**
|
||
* There are two kinds of grapheme clusters: 1) Legacy 2)Extended. This method
|
||
* is to check for both using a boolean flag to switch between them.
|
||
* @param {number} a The code point value of the first character.
|
||
* @param {number} b The code point value of the second character.
|
||
* @param {boolean=} opt_extended If true, indicates extended grapheme cluster;
|
||
* If false, indicates legacy cluster.
|
||
* @return {boolean} True if a & b do not form a cluster; False otherwise.
|
||
*/
|
||
goog.i18n.GraphemeBreak.hasGraphemeBreak = function(a, b, opt_extended) {
|
||
|
||
var prop_a = goog.i18n.GraphemeBreak.getBreakProp_(a);
|
||
var prop_b = goog.i18n.GraphemeBreak.getBreakProp_(b);
|
||
var prop = goog.i18n.GraphemeBreak.property;
|
||
|
||
return goog.i18n.GraphemeBreak.applyLegacyBreakRules_(prop_a, prop_b) &&
|
||
!(opt_extended &&
|
||
(prop_a == prop.PREPEND || prop_b == prop.SPACING_MARK));
|
||
};
|