diff options
Diffstat (limited to 'node_modules/regexpu-core/rewrite-pattern.js')
-rw-r--r-- | node_modules/regexpu-core/rewrite-pattern.js | 193 |
1 files changed, 0 insertions, 193 deletions
diff --git a/node_modules/regexpu-core/rewrite-pattern.js b/node_modules/regexpu-core/rewrite-pattern.js deleted file mode 100644 index 47a785433..000000000 --- a/node_modules/regexpu-core/rewrite-pattern.js +++ /dev/null @@ -1,193 +0,0 @@ -var generate = require('regjsgen').generate; -var parse = require('regjsparser').parse; -var regenerate = require('regenerate'); -var iuMappings = require('./data/iu-mappings.json'); -var ESCAPE_SETS = require('./data/character-class-escape-sets.js'); - -function getCharacterClassEscapeSet(character) { - if (unicode) { - if (ignoreCase) { - return ESCAPE_SETS.UNICODE_IGNORE_CASE[character]; - } - return ESCAPE_SETS.UNICODE[character]; - } - return ESCAPE_SETS.REGULAR[character]; -} - -var object = {}; -var hasOwnProperty = object.hasOwnProperty; -function has(object, property) { - return hasOwnProperty.call(object, property); -} - -// Prepare a Regenerate set containing all code points, used for negative -// character classes (if any). -var UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF); -// Without the `u` flag, the range stops at 0xFFFF. -// https://mths.be/es6#sec-pattern-semantics -var BMP_SET = regenerate().addRange(0x0, 0xFFFF); - -// Prepare a Regenerate set containing all code points that are supposed to be -// matched by `/./u`. https://mths.be/es6#sec-atom -var DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points - .remove( - // minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators): - 0x000A, // Line Feed <LF> - 0x000D, // Carriage Return <CR> - 0x2028, // Line Separator <LS> - 0x2029 // Paragraph Separator <PS> - ); -// Prepare a Regenerate set containing all code points that are supposed to be -// matched by `/./` (only BMP code points). -var DOT_SET = DOT_SET_UNICODE.clone() - .intersection(BMP_SET); - -// Add a range of code points + any case-folded code points in that range to a -// set. -regenerate.prototype.iuAddRange = function(min, max) { - var $this = this; - do { - var folded = caseFold(min); - if (folded) { - $this.add(folded); - } - } while (++min <= max); - return $this; -}; - -function assign(target, source) { - for (var key in source) { - // Note: `hasOwnProperty` is not needed here. - target[key] = source[key]; - } -} - -function update(item, pattern) { - // TODO: Test if memoizing `pattern` here is worth the effort. - if (!pattern) { - return; - } - var tree = parse(pattern, ''); - switch (tree.type) { - case 'characterClass': - case 'group': - case 'value': - // No wrapping needed. - break; - default: - // Wrap the pattern in a non-capturing group. - tree = wrap(tree, pattern); - } - assign(item, tree); -} - -function wrap(tree, pattern) { - // Wrap the pattern in a non-capturing group. - return { - 'type': 'group', - 'behavior': 'ignore', - 'body': [tree], - 'raw': '(?:' + pattern + ')' - }; -} - -function caseFold(codePoint) { - return has(iuMappings, codePoint) ? iuMappings[codePoint] : false; -} - -var ignoreCase = false; -var unicode = false; -function processCharacterClass(characterClassItem) { - var set = regenerate(); - var body = characterClassItem.body.forEach(function(item) { - switch (item.type) { - case 'value': - set.add(item.codePoint); - if (ignoreCase && unicode) { - var folded = caseFold(item.codePoint); - if (folded) { - set.add(folded); - } - } - break; - case 'characterClassRange': - var min = item.min.codePoint; - var max = item.max.codePoint; - set.addRange(min, max); - if (ignoreCase && unicode) { - set.iuAddRange(min, max); - } - break; - case 'characterClassEscape': - set.add(getCharacterClassEscapeSet(item.value)); - break; - // The `default` clause is only here as a safeguard; it should never be - // reached. Code coverage tools should ignore it. - /* istanbul ignore next */ - default: - throw Error('Unknown term type: ' + item.type); - } - }); - if (characterClassItem.negative) { - set = (unicode ? UNICODE_SET : BMP_SET).clone().remove(set); - } - update(characterClassItem, set.toString()); - return characterClassItem; -} - -function processTerm(item) { - switch (item.type) { - case 'dot': - update( - item, - (unicode ? DOT_SET_UNICODE : DOT_SET).toString() - ); - break; - case 'characterClass': - item = processCharacterClass(item); - break; - case 'characterClassEscape': - update( - item, - getCharacterClassEscapeSet(item.value).toString() - ); - break; - case 'alternative': - case 'disjunction': - case 'group': - case 'quantifier': - item.body = item.body.map(processTerm); - break; - case 'value': - var codePoint = item.codePoint; - var set = regenerate(codePoint); - if (ignoreCase && unicode) { - var folded = caseFold(codePoint); - if (folded) { - set.add(folded); - } - } - update(item, set.toString()); - break; - case 'anchor': - case 'empty': - case 'group': - case 'reference': - // Nothing to do here. - break; - // The `default` clause is only here as a safeguard; it should never be - // reached. Code coverage tools should ignore it. - /* istanbul ignore next */ - default: - throw Error('Unknown term type: ' + item.type); - } - return item; -}; - -module.exports = function(pattern, flags) { - var tree = parse(pattern, flags); - ignoreCase = flags ? flags.indexOf('i') > -1 : false; - unicode = flags ? flags.indexOf('u') > -1 : false; - assign(tree, processTerm(tree)); - return generate(tree); -}; |