aboutsummaryrefslogtreecommitdiff
path: root/node_modules/regexpu-core/rewrite-pattern.js
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/regexpu-core/rewrite-pattern.js')
-rw-r--r--node_modules/regexpu-core/rewrite-pattern.js193
1 files changed, 193 insertions, 0 deletions
diff --git a/node_modules/regexpu-core/rewrite-pattern.js b/node_modules/regexpu-core/rewrite-pattern.js
new file mode 100644
index 000000000..47a785433
--- /dev/null
+++ b/node_modules/regexpu-core/rewrite-pattern.js
@@ -0,0 +1,193 @@
+var generate = require('regjsgen').generate;
+var parse = require('regjsparser').parse;
+var regenerate = require('regenerate');
+var iuMappings = require('./data/iu-mappings.json');
+var ESCAPE_SETS = require('./data/character-class-escape-sets.js');
+
+function getCharacterClassEscapeSet(character) {
+ if (unicode) {
+ if (ignoreCase) {
+ return ESCAPE_SETS.UNICODE_IGNORE_CASE[character];
+ }
+ return ESCAPE_SETS.UNICODE[character];
+ }
+ return ESCAPE_SETS.REGULAR[character];
+}
+
+var object = {};
+var hasOwnProperty = object.hasOwnProperty;
+function has(object, property) {
+ return hasOwnProperty.call(object, property);
+}
+
+// Prepare a Regenerate set containing all code points, used for negative
+// character classes (if any).
+var UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);
+// Without the `u` flag, the range stops at 0xFFFF.
+// https://mths.be/es6#sec-pattern-semantics
+var BMP_SET = regenerate().addRange(0x0, 0xFFFF);
+
+// Prepare a Regenerate set containing all code points that are supposed to be
+// matched by `/./u`. https://mths.be/es6#sec-atom
+var DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
+ .remove(
+ // minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
+ 0x000A, // Line Feed <LF>
+ 0x000D, // Carriage Return <CR>
+ 0x2028, // Line Separator <LS>
+ 0x2029 // Paragraph Separator <PS>
+ );
+// Prepare a Regenerate set containing all code points that are supposed to be
+// matched by `/./` (only BMP code points).
+var DOT_SET = DOT_SET_UNICODE.clone()
+ .intersection(BMP_SET);
+
+// Add a range of code points + any case-folded code points in that range to a
+// set.
+regenerate.prototype.iuAddRange = function(min, max) {
+ var $this = this;
+ do {
+ var folded = caseFold(min);
+ if (folded) {
+ $this.add(folded);
+ }
+ } while (++min <= max);
+ return $this;
+};
+
+function assign(target, source) {
+ for (var key in source) {
+ // Note: `hasOwnProperty` is not needed here.
+ target[key] = source[key];
+ }
+}
+
+function update(item, pattern) {
+ // TODO: Test if memoizing `pattern` here is worth the effort.
+ if (!pattern) {
+ return;
+ }
+ var tree = parse(pattern, '');
+ switch (tree.type) {
+ case 'characterClass':
+ case 'group':
+ case 'value':
+ // No wrapping needed.
+ break;
+ default:
+ // Wrap the pattern in a non-capturing group.
+ tree = wrap(tree, pattern);
+ }
+ assign(item, tree);
+}
+
+function wrap(tree, pattern) {
+ // Wrap the pattern in a non-capturing group.
+ return {
+ 'type': 'group',
+ 'behavior': 'ignore',
+ 'body': [tree],
+ 'raw': '(?:' + pattern + ')'
+ };
+}
+
+function caseFold(codePoint) {
+ return has(iuMappings, codePoint) ? iuMappings[codePoint] : false;
+}
+
+var ignoreCase = false;
+var unicode = false;
+function processCharacterClass(characterClassItem) {
+ var set = regenerate();
+ var body = characterClassItem.body.forEach(function(item) {
+ switch (item.type) {
+ case 'value':
+ set.add(item.codePoint);
+ if (ignoreCase && unicode) {
+ var folded = caseFold(item.codePoint);
+ if (folded) {
+ set.add(folded);
+ }
+ }
+ break;
+ case 'characterClassRange':
+ var min = item.min.codePoint;
+ var max = item.max.codePoint;
+ set.addRange(min, max);
+ if (ignoreCase && unicode) {
+ set.iuAddRange(min, max);
+ }
+ break;
+ case 'characterClassEscape':
+ set.add(getCharacterClassEscapeSet(item.value));
+ break;
+ // The `default` clause is only here as a safeguard; it should never be
+ // reached. Code coverage tools should ignore it.
+ /* istanbul ignore next */
+ default:
+ throw Error('Unknown term type: ' + item.type);
+ }
+ });
+ if (characterClassItem.negative) {
+ set = (unicode ? UNICODE_SET : BMP_SET).clone().remove(set);
+ }
+ update(characterClassItem, set.toString());
+ return characterClassItem;
+}
+
+function processTerm(item) {
+ switch (item.type) {
+ case 'dot':
+ update(
+ item,
+ (unicode ? DOT_SET_UNICODE : DOT_SET).toString()
+ );
+ break;
+ case 'characterClass':
+ item = processCharacterClass(item);
+ break;
+ case 'characterClassEscape':
+ update(
+ item,
+ getCharacterClassEscapeSet(item.value).toString()
+ );
+ break;
+ case 'alternative':
+ case 'disjunction':
+ case 'group':
+ case 'quantifier':
+ item.body = item.body.map(processTerm);
+ break;
+ case 'value':
+ var codePoint = item.codePoint;
+ var set = regenerate(codePoint);
+ if (ignoreCase && unicode) {
+ var folded = caseFold(codePoint);
+ if (folded) {
+ set.add(folded);
+ }
+ }
+ update(item, set.toString());
+ break;
+ case 'anchor':
+ case 'empty':
+ case 'group':
+ case 'reference':
+ // Nothing to do here.
+ break;
+ // The `default` clause is only here as a safeguard; it should never be
+ // reached. Code coverage tools should ignore it.
+ /* istanbul ignore next */
+ default:
+ throw Error('Unknown term type: ' + item.type);
+ }
+ return item;
+};
+
+module.exports = function(pattern, flags) {
+ var tree = parse(pattern, flags);
+ ignoreCase = flags ? flags.indexOf('i') > -1 : false;
+ unicode = flags ? flags.indexOf('u') > -1 : false;
+ assign(tree, processTerm(tree));
+ return generate(tree);
+};