aboutsummaryrefslogtreecommitdiff
path: root/node_modules/fbjs/lib/UnicodeHangulKorean.js
blob: 67252699d2bab91ecc0bf8751fc5874fc58d80ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/**
 * Copyright (c) 2013-present, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 *
 * @typechecks
 */

/**
 * Unicode algorithms for Hangul script, the Korean writing system
 *
 * Hangul script has three encoded models in Unicode:
 *
 * A) Conjoining Jamo (covers modern and historic elements)
 *    * U+1100..U+11FF ; Hangul Jamo
 *    * U+A960..U+A97F ; Hangul Jamo Extended-A
 *    * U+D7B0..U+D7FF ; Hangul Jamo Extended-B
 *
 * B) Conjoined Syllables (only covers modern Korean language)
 *    * U+AC00..U+D7AF ; Hangul Syllables
 *
 * C) Compatibility Jamo (one code-point for each "shape")
 *    * U+3130..U+318F ; Hangul Compatibility Jamo
 *
 * This modules helps you convert characters from one model to another.
 * Primary functionalities are:
 *
 * 1) Convert from any encodings to Conjoining Jamo characters (A),
 *    e.g. for prefix matching
 *
 * 2) Convert from any encodings to Syllable characters, when possible (B),
 *    e.g. to reach the normal Unicode form (NFC)
 */

'use strict';

var HANGUL_COMPATIBILITY_OR_SYLLABLE_REGEX = /[\u3130-\u318F\uAC00-\uD7AF]/;

/**
 * Returns true if the input includes any Hangul Compatibility Jamo or
 * Hangul Conjoined Syllable.
 *
 * @param {string} str
 */
function hasCompatibilityOrSyllable(str) {
  return HANGUL_COMPATIBILITY_OR_SYLLABLE_REGEX.test(str);
}

/* Compatibility Jamo -> Conjoining Jamo
 *
 * Maps a compatibility character to the Conjoining Jamo character,
 * positioned at (compatibilityCodePoint - 0x3131).
 *
 * Generated by:
 * $ grep '^31[3-8].;' UnicodeData.txt |\
 *     awk -F';' '{print $6}' | awk '{print "  0x"$2","}'
 */
var CMAP = [0x1100, 0x1101, 0x11AA, 0x1102, 0x11AC, 0x11AD, 0x1103, 0x1104, 0x1105, 0x11B0, 0x11B1, 0x11B2, 0x11B3, 0x11B4, 0x11B5, 0x111A, 0x1106, 0x1107, 0x1108, 0x1121, 0x1109, 0x110A, 0x110B, 0x110C, 0x110D, 0x110E, 0x110F, 0x1110, 0x1111, 0x1112, 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168, 0x1169, 0x116A, 0x116B, 0x116C, 0x116D, 0x116E, 0x116F, 0x1170, 0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0x1160, 0x1114, 0x1115, 0x11C7, 0x11C8, 0x11CC, 0x11CE, 0x11D3, 0x11D7, 0x11D9, 0x111C, 0x11DD, 0x11DF, 0x111D, 0x111E, 0x1120, 0x1122, 0x1123, 0x1127, 0x1129, 0x112B, 0x112C, 0x112D, 0x112E, 0x112F, 0x1132, 0x1136, 0x1140, 0x1147, 0x114C, 0x11F1, 0x11F2, 0x1157, 0x1158, 0x1159, 0x1184, 0x1185, 0x1188, 0x1191, 0x1192, 0x1194, 0x119E, 0x11A1];

var CBASE = 0x3131;
var CCOUNT = CMAP.length;
var CTOP = CBASE + CCOUNT;

/**
 * Maps one Hangul Compatibility Jamo code-point to the equivalent Hangul
 * Conjoining Jamo characters, as defined in UnicodeData.txt.
 *
 * @param {number} codePoint  One Unicode code-point
 * @output {string}
 */
function fromCompatibility(codePoint) {
  return String.fromCharCode(CMAP[codePoint - CBASE]);
}

/**
 * Conjoined Syllable -> Conjoining Jamo
 *
 * Based on the "Hangul Syllable Decomposition" algorithm provided in
 * 3.12 Conjoining Jamo Behavior, The Unicode Standard, Version 6.3.0.
 * <http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf>
 */

var LBASE = 0x1100;
var VBASE = 0x1161;
var TBASE = 0x11A7;
var SBASE = 0xAC00;
var LCOUNT = 19;
var VCOUNT = 21;
var TCOUNT = 28;
var NCOUNT = VCOUNT * TCOUNT;
var SCOUNT = LCOUNT * NCOUNT;
var STOP = SBASE + SCOUNT;

/**
 * Maps one Hangul Syllable code-point to the equivalent Hangul
 * Conjoining Jamo characters, as defined in UnicodeData.txt.
 *
 * @param {number} codePoint  One Unicode character
 * @output {string}
 */
function decomposeSyllable(codePoint) {
  var sylSIndex = codePoint - SBASE;
  var sylTIndex = sylSIndex % TCOUNT;
  return String.fromCharCode(LBASE + sylSIndex / NCOUNT) + String.fromCharCode(VBASE + sylSIndex % NCOUNT / TCOUNT) + (sylTIndex > 0 ? String.fromCharCode(TBASE + sylTIndex) : '');
}

/* To Conjoining Jamo */

/**
 * Return Unicode characters as they are, except for Hangul characters, which
 * will be converted to the Conjoining Jamo form.
 *
 * @param {string} string
 * @output {string}
 */
function toConjoiningJamo(string) {
  if (!hasCompatibilityOrSyllable(string)) {
    return string;
  }

  var result = [];
  for (var i = 0; i < string.length; i++) {
    var charStr = string.charAt(i);
    var codeUnit = charStr.charCodeAt(0);
    result.push(CBASE <= codeUnit && codeUnit < CTOP ? fromCompatibility(codeUnit) : SBASE <= codeUnit && codeUnit < STOP ? decomposeSyllable(codeUnit) : charStr);
  }
  return result.join('');
}

var UnicodeHangulKorean = {
  toConjoiningJamo: toConjoiningJamo
};

module.exports = UnicodeHangulKorean;