wallet-core/node_modules/fbjs/lib/UnicodeUtils.js

/**
 * Copyright (c) 2013-present, Facebook, Inc.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 *
 * @typechecks
 */

/**
 * Unicode-enabled replacesments for basic String functions.
 *
 * All the functions in this module assume that the input string is a valid
 * UTF-16 encoding of a Unicode sequence. If it's not the case, the behavior
 * will be undefined.
 *
 * WARNING: Since this module is typechecks-enforced, you may find new bugs
 * when replacing normal String functions with ones provided here.
 */

'use strict';

var invariant = require('./invariant');

// These two ranges are consecutive so anything in [HIGH_START, LOW_END] is a
// surrogate code unit.
var SURROGATE_HIGH_START = 0xD800;
var SURROGATE_HIGH_END = 0xDBFF;
var SURROGATE_LOW_START = 0xDC00;
var SURROGATE_LOW_END = 0xDFFF;
var SURROGATE_UNITS_REGEX = /[\uD800-\uDFFF]/;

/**
 * @param {number} codeUnit   A Unicode code-unit, in range [0, 0x10FFFF]
 * @return {boolean}          Whether code-unit is in a surrogate (hi/low) range
 */
function isCodeUnitInSurrogateRange(codeUnit) {
  return SURROGATE_HIGH_START <= codeUnit && codeUnit <= SURROGATE_LOW_END;
}

/**
 * Returns whether the two characters starting at `index` form a surrogate pair.
 * For example, given the string s = "\uD83D\uDE0A", (s, 0) returns true and
 * (s, 1) returns false.
 *
 * @param {string} str
 * @param {number} index
 * @return {boolean}
 */
function isSurrogatePair(str, index) {
  !(0 <= index && index < str.length) ? process.env.NODE_ENV !== 'production' ? invariant(false, 'isSurrogatePair: Invalid index %s for string length %s.', index, str.length) : invariant(false) : void 0;
  if (index + 1 === str.length) {
    return false;
  }
  var first = str.charCodeAt(index);
  var second = str.charCodeAt(index + 1);
  return SURROGATE_HIGH_START <= first && first <= SURROGATE_HIGH_END && SURROGATE_LOW_START <= second && second <= SURROGATE_LOW_END;
}

/**
 * @param {string} str  Non-empty string
 * @return {boolean}    True if the input includes any surrogate code units
 */
function hasSurrogateUnit(str) {
  return SURROGATE_UNITS_REGEX.test(str);
}

/**
 * Return the length of the original Unicode character at given position in the
 * String by looking into the UTF-16 code unit; that is equal to 1 for any
 * non-surrogate characters in BMP ([U+0000..U+D7FF] and [U+E000, U+FFFF]); and
 * returns 2 for the hi/low surrogates ([U+D800..U+DFFF]), which are in fact
 * representing non-BMP characters ([U+10000..U+10FFFF]).
 *
 * Examples:
 * - '\u0020' => 1
 * - '\u3020' => 1
 * - '\uD835' => 2
 * - '\uD835\uDDEF' => 2
 * - '\uDDEF' => 2
 *
 * @param {string} str  Non-empty string
 * @param {number} pos  Position in the string to look for one code unit
 * @return {number}      Number 1 or 2
 */
function getUTF16Length(str, pos) {
  return 1 + isCodeUnitInSurrogateRange(str.charCodeAt(pos));
}

/**
 * Fully Unicode-enabled replacement for String#length
 *
 * @param {string} str  Valid Unicode string
 * @return {number}     The number of Unicode characters in the string
 */
function strlen(str) {
  // Call the native functions if there's no surrogate char
  if (!hasSurrogateUnit(str)) {
    return str.length;
  }

  var len = 0;
  for (var pos = 0; pos < str.length; pos += getUTF16Length(str, pos)) {
    len++;
  }
  return len;
}

/**
 * Fully Unicode-enabled replacement for String#substr()
 *
 * @param {string} str      Valid Unicode string
 * @param {number} start    Location in Unicode sequence to begin extracting
 * @param {?number} length  The number of Unicode characters to extract
 *                          (default: to the end of the string)
 * @return {string}         Extracted sub-string
 */
function substr(str, start, length) {
  start = start || 0;
  length = length === undefined ? Infinity : length || 0;

  // Call the native functions if there's no surrogate char
  if (!hasSurrogateUnit(str)) {
    return str.substr(start, length);
  }

  // Obvious cases
  var size = str.length;
  if (size <= 0 || start > size || length <= 0) {
    return '';
  }

  // Find the actual starting position
  var posA = 0;
  if (start > 0) {
    for (; start > 0 && posA < size; start--) {
      posA += getUTF16Length(str, posA);
    }
    if (posA >= size) {
      return '';
    }
  } else if (start < 0) {
    for (posA = size; start < 0 && 0 < posA; start++) {
      posA -= getUTF16Length(str, posA - 1);
    }
    if (posA < 0) {
      posA = 0;
    }
  }

  // Find the actual ending position
  var posB = size;
  if (length < size) {
    for (posB = posA; length > 0 && posB < size; length--) {
      posB += getUTF16Length(str, posB);
    }
  }

  return str.substring(posA, posB);
}

/**
 * Fully Unicode-enabled replacement for String#substring()
 *
 * @param {string} str    Valid Unicode string
 * @param {number} start  Location in Unicode sequence to begin extracting
 * @param {?number} end   Location in Unicode sequence to end extracting
 *                        (default: end of the string)
 * @return {string}       Extracted sub-string
 */
function substring(str, start, end) {
  start = start || 0;
  end = end === undefined ? Infinity : end || 0;

  if (start < 0) {
    start = 0;
  }
  if (end < 0) {
    end = 0;
  }

  var length = Math.abs(end - start);
  start = start < end ? start : end;
  return substr(str, start, length);
}

/**
 * Get a list of Unicode code-points from a String
 *
 * @param {string} str        Valid Unicode string
 * @return {array<number>}    A list of code-points in [0..0x10FFFF]
 */
function getCodePoints(str) {
  var codePoints = [];
  for (var pos = 0; pos < str.length; pos += getUTF16Length(str, pos)) {
    codePoints.push(str.codePointAt(pos));
  }
  return codePoints;
}

var UnicodeUtils = {
  getCodePoints: getCodePoints,
  getUTF16Length: getUTF16Length,
  hasSurrogateUnit: hasSurrogateUnit,
  isCodeUnitInSurrogateRange: isCodeUnitInSurrogateRange,
  isSurrogatePair: isSurrogatePair,
  strlen: strlen,
  substring: substring,
  substr: substr
};

module.exports = UnicodeUtils;
node_modules 2017-05-03 15:35:00 +02:00			`/**`
			`* Copyright (c) 2013-present, Facebook, Inc.`
			`*`
update dependencies 2017-10-14 18:40:54 +02:00			`* This source code is licensed under the MIT license found in the`
			`* LICENSE file in the root directory of this source tree.`
node_modules 2017-05-03 15:35:00 +02:00			`*`
			`* @typechecks`
			`*/`

			`/**`
			`* Unicode-enabled replacesments for basic String functions.`
			`*`
			`* All the functions in this module assume that the input string is a valid`
			`* UTF-16 encoding of a Unicode sequence. If it's not the case, the behavior`
			`* will be undefined.`
			`*`
			`* WARNING: Since this module is typechecks-enforced, you may find new bugs`
			`* when replacing normal String functions with ones provided here.`
			`*/`

			`'use strict';`

			`var invariant = require('./invariant');`

			`// These two ranges are consecutive so anything in [HIGH_START, LOW_END] is a`
			`// surrogate code unit.`
			`var SURROGATE_HIGH_START = 0xD800;`
			`var SURROGATE_HIGH_END = 0xDBFF;`
			`var SURROGATE_LOW_START = 0xDC00;`
			`var SURROGATE_LOW_END = 0xDFFF;`
			`var SURROGATE_UNITS_REGEX = /[\uD800-\uDFFF]/;`

			`/**`
			`* @param {number} codeUnit A Unicode code-unit, in range [0, 0x10FFFF]`
			`* @return {boolean} Whether code-unit is in a surrogate (hi/low) range`
			`*/`
			`function isCodeUnitInSurrogateRange(codeUnit) {`
			`return SURROGATE_HIGH_START <= codeUnit && codeUnit <= SURROGATE_LOW_END;`
			`}`

			`/**`
			* Returns whether the two characters starting at `index` form a surrogate pair.
			`* For example, given the string s = "\uD83D\uDE0A", (s, 0) returns true and`
			`* (s, 1) returns false.`
			`*`
			`* @param {string} str`
			`* @param {number} index`
			`* @return {boolean}`
			`*/`
			`function isSurrogatePair(str, index) {`
			`!(0 <= index && index < str.length) ? process.env.NODE_ENV !== 'production' ? invariant(false, 'isSurrogatePair: Invalid index %s for string length %s.', index, str.length) : invariant(false) : void 0;`
			`if (index + 1 === str.length) {`
			`return false;`
			`}`
			`var first = str.charCodeAt(index);`
			`var second = str.charCodeAt(index + 1);`
			`return SURROGATE_HIGH_START <= first && first <= SURROGATE_HIGH_END && SURROGATE_LOW_START <= second && second <= SURROGATE_LOW_END;`
			`}`

			`/**`
			`* @param {string} str Non-empty string`
			`* @return {boolean} True if the input includes any surrogate code units`
			`*/`
			`function hasSurrogateUnit(str) {`
			`return SURROGATE_UNITS_REGEX.test(str);`
			`}`

			`/**`
			`* Return the length of the original Unicode character at given position in the`
			`* String by looking into the UTF-16 code unit; that is equal to 1 for any`
			`* non-surrogate characters in BMP ([U+0000..U+D7FF] and [U+E000, U+FFFF]); and`
			`* returns 2 for the hi/low surrogates ([U+D800..U+DFFF]), which are in fact`
			`* representing non-BMP characters ([U+10000..U+10FFFF]).`
			`*`
			`* Examples:`
			`* - '\u0020' => 1`
			`* - '\u3020' => 1`
			`* - '\uD835' => 2`
			`* - '\uD835\uDDEF' => 2`
			`* - '\uDDEF' => 2`
			`*`
			`* @param {string} str Non-empty string`
			`* @param {number} pos Position in the string to look for one code unit`
			`* @return {number} Number 1 or 2`
			`*/`
			`function getUTF16Length(str, pos) {`
			`return 1 + isCodeUnitInSurrogateRange(str.charCodeAt(pos));`
			`}`

			`/**`
			`* Fully Unicode-enabled replacement for String#length`
			`*`
			`* @param {string} str Valid Unicode string`
			`* @return {number} The number of Unicode characters in the string`
			`*/`
			`function strlen(str) {`
			`// Call the native functions if there's no surrogate char`
			`if (!hasSurrogateUnit(str)) {`
			`return str.length;`
			`}`

			`var len = 0;`
			`for (var pos = 0; pos < str.length; pos += getUTF16Length(str, pos)) {`
			`len++;`
			`}`
			`return len;`
			`}`

			`/**`
			`* Fully Unicode-enabled replacement for String#substr()`
			`*`
			`* @param {string} str Valid Unicode string`
			`* @param {number} start Location in Unicode sequence to begin extracting`
			`* @param {?number} length The number of Unicode characters to extract`
			`* (default: to the end of the string)`
			`* @return {string} Extracted sub-string`
			`*/`
			`function substr(str, start, length) {`
			`start = start \|\| 0;`
			`length = length === undefined ? Infinity : length \|\| 0;`

			`// Call the native functions if there's no surrogate char`
			`if (!hasSurrogateUnit(str)) {`
			`return str.substr(start, length);`
			`}`

			`// Obvious cases`
			`var size = str.length;`
			`if (size <= 0 \|\| start > size \|\| length <= 0) {`
			`return '';`
			`}`

			`// Find the actual starting position`
			`var posA = 0;`
			`if (start > 0) {`
			`for (; start > 0 && posA < size; start--) {`
			`posA += getUTF16Length(str, posA);`
			`}`
			`if (posA >= size) {`
			`return '';`
			`}`
			`} else if (start < 0) {`
			`for (posA = size; start < 0 && 0 < posA; start++) {`
			`posA -= getUTF16Length(str, posA - 1);`
			`}`
			`if (posA < 0) {`
			`posA = 0;`
			`}`
			`}`

			`// Find the actual ending position`
			`var posB = size;`
			`if (length < size) {`
			`for (posB = posA; length > 0 && posB < size; length--) {`
			`posB += getUTF16Length(str, posB);`
			`}`
			`}`

			`return str.substring(posA, posB);`
			`}`

			`/**`
			`* Fully Unicode-enabled replacement for String#substring()`
			`*`
			`* @param {string} str Valid Unicode string`
			`* @param {number} start Location in Unicode sequence to begin extracting`
			`* @param {?number} end Location in Unicode sequence to end extracting`
			`* (default: end of the string)`
			`* @return {string} Extracted sub-string`
			`*/`
			`function substring(str, start, end) {`
			`start = start \|\| 0;`
			`end = end === undefined ? Infinity : end \|\| 0;`

			`if (start < 0) {`
			`start = 0;`
			`}`
			`if (end < 0) {`
			`end = 0;`
			`}`

			`var length = Math.abs(end - start);`
			`start = start < end ? start : end;`
			`return substr(str, start, length);`
			`}`

			`/**`
			`* Get a list of Unicode code-points from a String`
			`*`
			`* @param {string} str Valid Unicode string`
			`* @return {array<number>} A list of code-points in [0..0x10FFFF]`
			`*/`
			`function getCodePoints(str) {`
			`var codePoints = [];`
			`for (var pos = 0; pos < str.length; pos += getUTF16Length(str, pos)) {`
			`codePoints.push(str.codePointAt(pos));`
			`}`
			`return codePoints;`
			`}`

			`var UnicodeUtils = {`
			`getCodePoints: getCodePoints,`
			`getUTF16Length: getUTF16Length,`
			`hasSurrogateUnit: hasSurrogateUnit,`
			`isCodeUnitInSurrogateRange: isCodeUnitInSurrogateRange,`
			`isSurrogatePair: isSurrogatePair,`
			`strlen: strlen,`
			`substring: substring,`
			`substr: substr`
			`};`

			`module.exports = UnicodeUtils;`