diff options
Diffstat (limited to 'node_modules/fbjs/lib/UnicodeCJK.js')
-rw-r--r-- | node_modules/fbjs/lib/UnicodeCJK.js | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/node_modules/fbjs/lib/UnicodeCJK.js b/node_modules/fbjs/lib/UnicodeCJK.js new file mode 100644 index 000000000..762fa7ebe --- /dev/null +++ b/node_modules/fbjs/lib/UnicodeCJK.js @@ -0,0 +1,174 @@ +/** + * Copyright (c) 2013-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + * + * @typechecks + */ + +/** + * Unicode algorithms for CJK (Chinese, Japanese, Korean) writing systems. + * + * Utilities for Hanzi/Kanji/Hanja logographs and Kanas (Katakana and Hiragana) + * syllables. + * + * For Korean Hangul see module `UnicodeHangulKorean`. + */ + +'use strict'; + +/** + * Latin + * + * NOTE: The code assumes these sets include only BMP characters. + */ + +var R_LATIN_ASCII = 'a-zA-Z'; +var R_LATIN_FULLWIDTH = '\uFF21-\uFF3A\uFF41-\uFF5A'; +var R_LATIN = R_LATIN_ASCII + R_LATIN_FULLWIDTH; + +/** + * Hiragana & Katakana + * + * NOTE: Some ranges include non-BMP characters. We do not support those ranges + * for now. + */ +var R_HIRAGANA = '\u3040-\u309F'; +var R_KATAKANA = '\u30A0-\u30FF'; +var R_KATAKANA_PHONETIC = '\u31F0-\u31FF'; +var R_KATAKANA_HALFWIDTH = '\uFF65-\uFF9F'; +// var R_KANA_SUPPLEMENT = '\U0001B000-\U0001B0FF'; +var R_KATAKANA_ALL = R_KATAKANA + R_KATAKANA_PHONETIC + R_KATAKANA_HALFWIDTH; +var R_KANA = R_HIRAGANA + R_KATAKANA_ALL; + +var I_HIRAGANA = [0x3040, 0x309F]; +var I_KATAKANA = [0x30A0, 0x30FF]; +var I_HIRAGANA_TO_KATAKANA = I_KATAKANA[0] - I_HIRAGANA[0]; + +/** + * Hanzi/Kanji/Hanja + * + * NOTE: Some ranges include non-BMP characters. We do not support those ranges + * for now. + */ +var R_IDEO_MAIN = '\u4E00-\u9FCF'; +var R_IDEO_EXT_A = '\u3400-\u4DBF'; +// var R_IDEO_EXT_B = '\U00020000-\U0002A6DF'; +// var R_IDEO_EXT_C = '\U0002A700-\U0002B73F'; +// var R_IDEO_EXT_D = '\U0002B740-\U0002B81F'; +var R_IDEO = R_IDEO_MAIN + R_IDEO_EXT_A; + +/** + * Hangul + */ +// var R_HANGUL_JAMO = '\u1100-\u11FF'; +// var R_HANGUL_JAMO_EXT_A = '\uA960-\uA97F'; +// var R_HANGUL_JAMO_EXT_B = '\uD7B0-\uD7FF'; +// var R_HANGUL_COMPATIBILITY = '\u3130-\u318F'; +// var R_HANGUL_COMP_HALFWIDTH = '\uFFA0-\uFFDF'; +var R_HANGUL_SYLLABLES = '\uAC00-\uD7AF'; + +/** + * Globals + */ +var R_IDEO_OR_SYLL = R_IDEO + R_KANA + R_HANGUL_SYLLABLES; + +var REGEX_IDEO = null; +var REGEX_KANA = null; +var REGEX_IDEO_OR_SYLL = null; +var REGEX_IS_KANA_WITH_TRAILING_LATIN = null; + +/** + * Whether the string includes any Katakana or Hiragana characters. + * + * @param {string} str + * @return {boolean} + */ +function hasKana(str) { + REGEX_KANA = REGEX_KANA || new RegExp('[' + R_KANA + ']'); + return REGEX_KANA.test(str); +} + +/** + * Whether the string includes any CJK Ideograph characters. + * + * @param {string} str + * @return {boolean} + */ +function hasIdeograph(str) { + REGEX_IDEO = REGEX_IDEO || new RegExp('[' + R_IDEO + ']'); + return REGEX_IDEO.test(str); +} + +/** + * Whether the string includes any CJK Ideograph or Syllable characters. + * + * @param {string} str + * @return {boolean} + */ +function hasIdeoOrSyll(str) { + REGEX_IDEO_OR_SYLL = REGEX_IDEO_OR_SYLL || new RegExp('[' + R_IDEO_OR_SYLL + ']'); + return REGEX_IDEO_OR_SYLL.test(str); +} + +/** + * @param {string} chr + * @output {string} + */ +function charCodeToKatakana(chr) { + var charCode = chr.charCodeAt(0); + return String.fromCharCode(charCode < I_HIRAGANA[0] || charCode > I_HIRAGANA[1] ? charCode : charCode + I_HIRAGANA_TO_KATAKANA); +} + +/** + * Replace any Hiragana character with the matching Katakana + * + * @param {string} str + * @output {string} + */ +function hiraganaToKatakana(str) { + if (!hasKana(str)) { + return str; + } + return str.split('').map(charCodeToKatakana).join(''); +} + +/** + * Whether the string is exactly a sequence of Kana characters followed by one + * Latin character. + * + * @param {string} str + * @output {string} + */ +function isKanaWithTrailingLatin(str) { + REGEX_IS_KANA_WITH_TRAILING_LATIN = REGEX_IS_KANA_WITH_TRAILING_LATIN || new RegExp('^' + '[' + R_KANA + ']+' + '[' + R_LATIN + ']' + '$'); + return REGEX_IS_KANA_WITH_TRAILING_LATIN.test(str); +} + +/** + * Drops the trailing Latin character from a string that is exactly a sequence + * of Kana characters followed by one Latin character. + * + * @param {string} str + * @output {string} + */ +function kanaRemoveTrailingLatin(str) { + if (isKanaWithTrailingLatin(str)) { + return str.substr(0, str.length - 1); + } + return str; +} + +var UnicodeCJK = { + hasKana: hasKana, + hasIdeograph: hasIdeograph, + hasIdeoOrSyll: hasIdeoOrSyll, + hiraganaToKatakana: hiraganaToKatakana, + isKanaWithTrailingLatin: isKanaWithTrailingLatin, + kanaRemoveTrailingLatin: kanaRemoveTrailingLatin +}; + +module.exports = UnicodeCJK;
\ No newline at end of file |