aboutsummaryrefslogtreecommitdiff
path: root/node_modules/fbjs/lib/UnicodeCJK.js
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/fbjs/lib/UnicodeCJK.js')
-rw-r--r--node_modules/fbjs/lib/UnicodeCJK.js174
1 files changed, 174 insertions, 0 deletions
diff --git a/node_modules/fbjs/lib/UnicodeCJK.js b/node_modules/fbjs/lib/UnicodeCJK.js
new file mode 100644
index 000000000..762fa7ebe
--- /dev/null
+++ b/node_modules/fbjs/lib/UnicodeCJK.js
@@ -0,0 +1,174 @@
+/**
+ * Copyright (c) 2013-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ *
+ * @typechecks
+ */
+
+/**
+ * Unicode algorithms for CJK (Chinese, Japanese, Korean) writing systems.
+ *
+ * Utilities for Hanzi/Kanji/Hanja logographs and Kanas (Katakana and Hiragana)
+ * syllables.
+ *
+ * For Korean Hangul see module `UnicodeHangulKorean`.
+ */
+
+'use strict';
+
+/**
+ * Latin
+ *
+ * NOTE: The code assumes these sets include only BMP characters.
+ */
+
+var R_LATIN_ASCII = 'a-zA-Z';
+var R_LATIN_FULLWIDTH = '\uFF21-\uFF3A\uFF41-\uFF5A';
+var R_LATIN = R_LATIN_ASCII + R_LATIN_FULLWIDTH;
+
+/**
+ * Hiragana & Katakana
+ *
+ * NOTE: Some ranges include non-BMP characters. We do not support those ranges
+ * for now.
+ */
+var R_HIRAGANA = '\u3040-\u309F';
+var R_KATAKANA = '\u30A0-\u30FF';
+var R_KATAKANA_PHONETIC = '\u31F0-\u31FF';
+var R_KATAKANA_HALFWIDTH = '\uFF65-\uFF9F';
+// var R_KANA_SUPPLEMENT = '\U0001B000-\U0001B0FF';
+var R_KATAKANA_ALL = R_KATAKANA + R_KATAKANA_PHONETIC + R_KATAKANA_HALFWIDTH;
+var R_KANA = R_HIRAGANA + R_KATAKANA_ALL;
+
+var I_HIRAGANA = [0x3040, 0x309F];
+var I_KATAKANA = [0x30A0, 0x30FF];
+var I_HIRAGANA_TO_KATAKANA = I_KATAKANA[0] - I_HIRAGANA[0];
+
+/**
+ * Hanzi/Kanji/Hanja
+ *
+ * NOTE: Some ranges include non-BMP characters. We do not support those ranges
+ * for now.
+ */
+var R_IDEO_MAIN = '\u4E00-\u9FCF';
+var R_IDEO_EXT_A = '\u3400-\u4DBF';
+// var R_IDEO_EXT_B = '\U00020000-\U0002A6DF';
+// var R_IDEO_EXT_C = '\U0002A700-\U0002B73F';
+// var R_IDEO_EXT_D = '\U0002B740-\U0002B81F';
+var R_IDEO = R_IDEO_MAIN + R_IDEO_EXT_A;
+
+/**
+ * Hangul
+ */
+// var R_HANGUL_JAMO = '\u1100-\u11FF';
+// var R_HANGUL_JAMO_EXT_A = '\uA960-\uA97F';
+// var R_HANGUL_JAMO_EXT_B = '\uD7B0-\uD7FF';
+// var R_HANGUL_COMPATIBILITY = '\u3130-\u318F';
+// var R_HANGUL_COMP_HALFWIDTH = '\uFFA0-\uFFDF';
+var R_HANGUL_SYLLABLES = '\uAC00-\uD7AF';
+
+/**
+ * Globals
+ */
+var R_IDEO_OR_SYLL = R_IDEO + R_KANA + R_HANGUL_SYLLABLES;
+
+var REGEX_IDEO = null;
+var REGEX_KANA = null;
+var REGEX_IDEO_OR_SYLL = null;
+var REGEX_IS_KANA_WITH_TRAILING_LATIN = null;
+
+/**
+ * Whether the string includes any Katakana or Hiragana characters.
+ *
+ * @param {string} str
+ * @return {boolean}
+ */
+function hasKana(str) {
+ REGEX_KANA = REGEX_KANA || new RegExp('[' + R_KANA + ']');
+ return REGEX_KANA.test(str);
+}
+
+/**
+ * Whether the string includes any CJK Ideograph characters.
+ *
+ * @param {string} str
+ * @return {boolean}
+ */
+function hasIdeograph(str) {
+ REGEX_IDEO = REGEX_IDEO || new RegExp('[' + R_IDEO + ']');
+ return REGEX_IDEO.test(str);
+}
+
+/**
+ * Whether the string includes any CJK Ideograph or Syllable characters.
+ *
+ * @param {string} str
+ * @return {boolean}
+ */
+function hasIdeoOrSyll(str) {
+ REGEX_IDEO_OR_SYLL = REGEX_IDEO_OR_SYLL || new RegExp('[' + R_IDEO_OR_SYLL + ']');
+ return REGEX_IDEO_OR_SYLL.test(str);
+}
+
+/**
+ * @param {string} chr
+ * @output {string}
+ */
+function charCodeToKatakana(chr) {
+ var charCode = chr.charCodeAt(0);
+ return String.fromCharCode(charCode < I_HIRAGANA[0] || charCode > I_HIRAGANA[1] ? charCode : charCode + I_HIRAGANA_TO_KATAKANA);
+}
+
+/**
+ * Replace any Hiragana character with the matching Katakana
+ *
+ * @param {string} str
+ * @output {string}
+ */
+function hiraganaToKatakana(str) {
+ if (!hasKana(str)) {
+ return str;
+ }
+ return str.split('').map(charCodeToKatakana).join('');
+}
+
+/**
+ * Whether the string is exactly a sequence of Kana characters followed by one
+ * Latin character.
+ *
+ * @param {string} str
+ * @output {string}
+ */
+function isKanaWithTrailingLatin(str) {
+ REGEX_IS_KANA_WITH_TRAILING_LATIN = REGEX_IS_KANA_WITH_TRAILING_LATIN || new RegExp('^' + '[' + R_KANA + ']+' + '[' + R_LATIN + ']' + '$');
+ return REGEX_IS_KANA_WITH_TRAILING_LATIN.test(str);
+}
+
+/**
+ * Drops the trailing Latin character from a string that is exactly a sequence
+ * of Kana characters followed by one Latin character.
+ *
+ * @param {string} str
+ * @output {string}
+ */
+function kanaRemoveTrailingLatin(str) {
+ if (isKanaWithTrailingLatin(str)) {
+ return str.substr(0, str.length - 1);
+ }
+ return str;
+}
+
+var UnicodeCJK = {
+ hasKana: hasKana,
+ hasIdeograph: hasIdeograph,
+ hasIdeoOrSyll: hasIdeoOrSyll,
+ hiraganaToKatakana: hiraganaToKatakana,
+ isKanaWithTrailingLatin: isKanaWithTrailingLatin,
+ kanaRemoveTrailingLatin: kanaRemoveTrailingLatin
+};
+
+module.exports = UnicodeCJK; \ No newline at end of file