diff options
author | Florian Dold <florian.dold@gmail.com> | 2017-05-28 00:38:50 +0200 |
---|---|---|
committer | Florian Dold <florian.dold@gmail.com> | 2017-05-28 00:40:43 +0200 |
commit | 7fff4499fd915bcea3fa93b1aa8b35f4fe7a6027 (patch) | |
tree | 6de9a1aebd150a23b7f8c273ec657a5d0a18fe3e /node_modules/regexpu-core/rewrite-pattern.js | |
parent | 963b7a41feb29cc4be090a2446bdfe0c1f1bcd81 (diff) |
add linting (and some initial fixes)
Diffstat (limited to 'node_modules/regexpu-core/rewrite-pattern.js')
-rw-r--r-- | node_modules/regexpu-core/rewrite-pattern.js | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/node_modules/regexpu-core/rewrite-pattern.js b/node_modules/regexpu-core/rewrite-pattern.js new file mode 100644 index 000000000..47a785433 --- /dev/null +++ b/node_modules/regexpu-core/rewrite-pattern.js @@ -0,0 +1,193 @@ +var generate = require('regjsgen').generate; +var parse = require('regjsparser').parse; +var regenerate = require('regenerate'); +var iuMappings = require('./data/iu-mappings.json'); +var ESCAPE_SETS = require('./data/character-class-escape-sets.js'); + +function getCharacterClassEscapeSet(character) { + if (unicode) { + if (ignoreCase) { + return ESCAPE_SETS.UNICODE_IGNORE_CASE[character]; + } + return ESCAPE_SETS.UNICODE[character]; + } + return ESCAPE_SETS.REGULAR[character]; +} + +var object = {}; +var hasOwnProperty = object.hasOwnProperty; +function has(object, property) { + return hasOwnProperty.call(object, property); +} + +// Prepare a Regenerate set containing all code points, used for negative +// character classes (if any). +var UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF); +// Without the `u` flag, the range stops at 0xFFFF. +// https://mths.be/es6#sec-pattern-semantics +var BMP_SET = regenerate().addRange(0x0, 0xFFFF); + +// Prepare a Regenerate set containing all code points that are supposed to be +// matched by `/./u`. https://mths.be/es6#sec-atom +var DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points + .remove( + // minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators): + 0x000A, // Line Feed <LF> + 0x000D, // Carriage Return <CR> + 0x2028, // Line Separator <LS> + 0x2029 // Paragraph Separator <PS> + ); +// Prepare a Regenerate set containing all code points that are supposed to be +// matched by `/./` (only BMP code points). +var DOT_SET = DOT_SET_UNICODE.clone() + .intersection(BMP_SET); + +// Add a range of code points + any case-folded code points in that range to a +// set. +regenerate.prototype.iuAddRange = function(min, max) { + var $this = this; + do { + var folded = caseFold(min); + if (folded) { + $this.add(folded); + } + } while (++min <= max); + return $this; +}; + +function assign(target, source) { + for (var key in source) { + // Note: `hasOwnProperty` is not needed here. + target[key] = source[key]; + } +} + +function update(item, pattern) { + // TODO: Test if memoizing `pattern` here is worth the effort. + if (!pattern) { + return; + } + var tree = parse(pattern, ''); + switch (tree.type) { + case 'characterClass': + case 'group': + case 'value': + // No wrapping needed. + break; + default: + // Wrap the pattern in a non-capturing group. + tree = wrap(tree, pattern); + } + assign(item, tree); +} + +function wrap(tree, pattern) { + // Wrap the pattern in a non-capturing group. + return { + 'type': 'group', + 'behavior': 'ignore', + 'body': [tree], + 'raw': '(?:' + pattern + ')' + }; +} + +function caseFold(codePoint) { + return has(iuMappings, codePoint) ? iuMappings[codePoint] : false; +} + +var ignoreCase = false; +var unicode = false; +function processCharacterClass(characterClassItem) { + var set = regenerate(); + var body = characterClassItem.body.forEach(function(item) { + switch (item.type) { + case 'value': + set.add(item.codePoint); + if (ignoreCase && unicode) { + var folded = caseFold(item.codePoint); + if (folded) { + set.add(folded); + } + } + break; + case 'characterClassRange': + var min = item.min.codePoint; + var max = item.max.codePoint; + set.addRange(min, max); + if (ignoreCase && unicode) { + set.iuAddRange(min, max); + } + break; + case 'characterClassEscape': + set.add(getCharacterClassEscapeSet(item.value)); + break; + // The `default` clause is only here as a safeguard; it should never be + // reached. Code coverage tools should ignore it. + /* istanbul ignore next */ + default: + throw Error('Unknown term type: ' + item.type); + } + }); + if (characterClassItem.negative) { + set = (unicode ? UNICODE_SET : BMP_SET).clone().remove(set); + } + update(characterClassItem, set.toString()); + return characterClassItem; +} + +function processTerm(item) { + switch (item.type) { + case 'dot': + update( + item, + (unicode ? DOT_SET_UNICODE : DOT_SET).toString() + ); + break; + case 'characterClass': + item = processCharacterClass(item); + break; + case 'characterClassEscape': + update( + item, + getCharacterClassEscapeSet(item.value).toString() + ); + break; + case 'alternative': + case 'disjunction': + case 'group': + case 'quantifier': + item.body = item.body.map(processTerm); + break; + case 'value': + var codePoint = item.codePoint; + var set = regenerate(codePoint); + if (ignoreCase && unicode) { + var folded = caseFold(codePoint); + if (folded) { + set.add(folded); + } + } + update(item, set.toString()); + break; + case 'anchor': + case 'empty': + case 'group': + case 'reference': + // Nothing to do here. + break; + // The `default` clause is only here as a safeguard; it should never be + // reached. Code coverage tools should ignore it. + /* istanbul ignore next */ + default: + throw Error('Unknown term type: ' + item.type); + } + return item; +}; + +module.exports = function(pattern, flags) { + var tree = parse(pattern, flags); + ignoreCase = flags ? flags.indexOf('i') > -1 : false; + unicode = flags ? flags.indexOf('u') > -1 : false; + assign(tree, processTerm(tree)); + return generate(tree); +}; |