wallet-core/packages/taler-util/src/globbing/minimatch.ts
Florian Dold a3687d84ba
separate node entry points
Signed-off-by: Florian Dold <florian@dold.me>
2021-08-20 13:18:55 +02:00

989 lines
28 KiB
TypeScript

/*
Original work Copyright (c) Isaac Z. Schlueter and Contributors
Modified work Copyright (c) 2021 Taler Systems S.A.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
*/
import { expand } from "./brace-expansion.js";
let path = { sep: "/" };
try {
path.sep = require("path").sep;
} catch (er) {}
const GLOBSTAR = {};
const plTypes = {
"!": { open: "(?:(?!(?:", close: "))[^/]*?)" },
"?": { open: "(?:", close: ")?" },
"+": { open: "(?:", close: ")+" },
"*": { open: "(?:", close: ")*" },
"@": { open: "(?:", close: ")" },
};
// any single thing other than /
// don't need to escape / when using new RegExp()
const qmark = "[^/]";
// * => any number of characters
const star = qmark + "*?";
// ** when dots are allowed. Anything goes, except .. and .
// not (^ or / followed by one or two dots followed by $ or /),
// followed by anything, any number of times.
const twoStarDot = "(?:(?!(?:\\/|^)(?:\\.{1,2})($|\\/)).)*?";
// not a ^ or / followed by a dot,
// followed by anything, any number of times.
const twoStarNoDot = "(?:(?!(?:\\/|^)\\.).)*?";
// characters that need to be escaped in RegExp.
const reSpecials = charSet("().*{}+?[]^$\\!");
// "abc" -> { a:true, b:true, c:true }
function charSet(s: string) {
return s.split("").reduce(function (set: any, c) {
set[c] = true;
return set;
}, {});
}
// normalizes slashes.
var slashSplit = /\/+/;
minimatch.filter = filter;
function filter(pattern: any, options: {}) {
options = options || {};
return function (p: any, i: any, list: any) {
return minimatch(p, pattern, options);
};
}
interface IOptions {
/**
* Dump a ton of stuff to stderr.
*
* @default false
*/
debug?: boolean;
/**
* Do not expand {a,b} and {1..3} brace sets.
*
* @default false
*/
nobrace?: boolean;
/**
* Disable ** matching against multiple folder names.
*
* @default false
*/
noglobstar?: boolean;
/**
* Allow patterns to match filenames starting with a period,
* even if the pattern does not explicitly have a period in that spot.
*
* @default false
*/
dot?: boolean;
/**
* Disable "extglob" style patterns like +(a|b).
*
* @default false
*/
noext?: boolean;
/**
* Perform a case-insensitive match.
*
* @default false
*/
nocase?: boolean;
/**
* When a match is not found by minimatch.match,
* return a list containing the pattern itself if this option is set.
* Otherwise, an empty list is returned if there are no matches.
*
* @default false
*/
nonull?: boolean;
/**
* If set, then patterns without slashes will be matched against
* the basename of the path if it contains slashes.
*
* @default false
*/
matchBase?: boolean;
/**
* Suppress the behavior of treating #
* at the start of a pattern as a comment.
*
* @default false
*/
nocomment?: boolean;
/**
* Suppress the behavior of treating a leading ! character as negation.
*
* @default false
*/
nonegate?: boolean;
/**
* Returns from negate expressions the same as if they were not negated.
* (Ie, true on a hit, false on a miss.)
*
* @default false
*/
flipNegate?: boolean;
}
export function minimatch(p: string, pattern: string, options?: IOptions) {
if (typeof pattern !== "string") {
throw new TypeError("glob pattern string required");
}
if (!options) options = {};
// shortcut: comments match nothing.
if (!options.nocomment && pattern.charAt(0) === "#") {
return false;
}
// "" only matches ""
if (pattern.trim() === "") return p === "";
return new Minimatch(pattern, options).match(p);
}
export class Minimatch {
options: IOptions;
pattern: string;
set: string[] | string = [];
regexp: RegExp | null | boolean = null;
negate: boolean = false;
comment: boolean = false;
empty: boolean = false;
made: boolean = false;
_made: boolean = false;
globSet: any;
globParts: any;
constructor(pattern: string, options: IOptions) {
if (typeof pattern !== "string") {
throw new TypeError("glob pattern string required");
}
if (!options) options = {};
pattern = pattern.trim();
// windows support: need to use /, not \
if (path.sep !== "/") {
pattern = pattern.split(path.sep).join("/");
}
this.options = options;
this.pattern = pattern;
// make the set of regexps etc.
this.make();
}
debug(...args: any[]) {}
make() {
// don't do it more than once.
if (this._made) return;
const pattern = this.pattern;
const options = this.options;
// empty patterns and comments match nothing.
if (!options.nocomment && pattern.charAt(0) === "#") {
this.comment = true;
return;
}
if (!pattern) {
this.empty = true;
return;
}
// step 1: figure out negation, etc.
this.parseNegate();
// step 2: expand braces
var set = (this.globSet = this.braceExpand());
if (options.debug) this.debug = console.error;
this.debug(this.pattern, set);
// step 3: now we have a set, so turn each one into a series of path-portion
// matching patterns.
// These will be regexps, except in the case of "**", which is
// set to the GLOBSTAR object for globstar behavior,
// and will not contain any / characters
set = this.globParts = set.map((s: any) => s.split(slashSplit));
this.debug(this.pattern, set);
// glob --> regexps
set = set.map((s: any[]) => {
return s.map((x) => this.parse(x), this);
}, this);
this.debug(this.pattern, set);
// filter out everything that didn't compile properly.
set = set.filter(function (s: any) {
return s.indexOf(false) === -1;
});
this.debug(this.pattern, set);
this.set = set;
}
parseNegate() {
var pattern = this.pattern;
var negate = false;
var options = this.options;
var negateOffset = 0;
if (options.nonegate) return;
for (
var i = 0, l = pattern.length;
i < l && pattern.charAt(i) === "!";
i++
) {
negate = !negate;
negateOffset++;
}
if (negateOffset) this.pattern = pattern.substr(negateOffset);
this.negate = negate;
}
braceExpand(pattern?: string, options?: IOptions) {
if (!options) {
if (this instanceof Minimatch) {
options = this.options;
} else {
options = {};
}
}
pattern = typeof pattern === "undefined" ? this.pattern : pattern;
if (typeof pattern === "undefined") {
throw new TypeError("undefined pattern");
}
if (options.nobrace || !pattern.match(/\{.*\}/)) {
// shortcut. no need to expand.
return [pattern];
}
return expand(pattern);
}
// parse a component of the expanded set.
// At this point, no pattern may contain "/" in it
// so we're going to return a 2d array, where each entry is the full
// pattern, split on '/', and then turned into a regular expression.
// A regexp is made at the end which joins each array with an
// escaped /, and another full one which joins each regexp with |.
//
// Following the lead of Bash 4.1, note that "**" only has special meaning
// when it is the *only* thing in a path portion. Otherwise, any series
// of * is equivalent to a single *. Globstar behavior is enabled by
// default, and can be disabled by setting options.noglobstar.
parse(pattern: string, isSub?: boolean): RegExp | string | {} {
if (pattern.length > 1024 * 64) {
throw new TypeError("pattern is too long");
}
var options = this.options;
// shortcuts
if (!options.noglobstar && pattern === "**") return GLOBSTAR;
if (pattern === "") return "";
var re = "";
var hasMagic = !!options.nocase;
var escaping = false;
// ? => one single character
var patternListStack: {
open: string;
close: string;
type: any;
start: number;
reStart: number;
reEnd?: number;
}[] = [];
var negativeLists = [];
let stateChar: string | boolean | undefined = undefined;
var inClass = false;
var reClassStart = -1;
var classStart = -1;
// . and .. never match anything that doesn't start with .,
// even when options.dot is set.
var patternStart =
pattern.charAt(0) === "."
? "" // anything
: // not (start or / followed by . or .. followed by / or end)
options.dot
? "(?!(?:^|\\/)\\.{1,2}(?:$|\\/))"
: "(?!\\.)";
var self = this;
function clearStateChar() {
if (stateChar) {
// we had some state-tracking character
// that wasn't consumed by this pass.
switch (stateChar) {
case "*":
re += star;
hasMagic = true;
break;
case "?":
re += qmark;
hasMagic = true;
break;
default:
re += "\\" + stateChar;
break;
}
self.debug("clearStateChar %j %j", stateChar, re);
stateChar = false;
}
}
for (
var i = 0, len = pattern.length, c;
i < len && (c = pattern.charAt(i));
i++
) {
this.debug("%s\t%s %s %j", pattern, i, re, c);
// skip over any that are escaped.
if (escaping && reSpecials[c]) {
re += "\\" + c;
escaping = false;
continue;
}
switch (c) {
case "/":
// completely not allowed, even escaped.
// Should already be path-split by now.
return false;
case "\\":
clearStateChar();
escaping = true;
continue;
// the various stateChar values
// for the "extglob" stuff.
case "?":
case "*":
case "+":
case "@":
case "!":
this.debug("%s\t%s %s %j <-- stateChar", pattern, i, re, c);
// all of those are literals inside a class, except that
// the glob [!a] means [^a] in regexp
if (inClass) {
this.debug(" in class");
if (c === "!" && i === classStart + 1) c = "^";
re += c;
continue;
}
// if we already have a stateChar, then it means
// that there was something like ** or +? in there.
// Handle the stateChar, then proceed with this one.
self.debug("call clearStateChar %j", stateChar);
clearStateChar();
stateChar = c;
// if extglob is disabled, then +(asdf|foo) isn't a thing.
// just clear the statechar *now*, rather than even diving into
// the patternList stuff.
if (options.noext) clearStateChar();
continue;
case "(":
if (inClass) {
re += "(";
continue;
}
if (!stateChar) {
re += "\\(";
continue;
}
patternListStack.push({
type: stateChar,
start: i - 1,
reStart: re.length,
// @ts-ignore
open: plTypes[stateChar].open,
// @ts-ignore
close: plTypes[stateChar].close,
});
// negation is (?:(?!js)[^/]*)
re += stateChar === "!" ? "(?:(?!(?:" : "(?:";
this.debug("plType %j %j", stateChar, re);
stateChar = false;
continue;
case ")":
if (inClass || !patternListStack.length) {
re += "\\)";
continue;
}
clearStateChar();
hasMagic = true;
var pl = patternListStack.pop();
// negation is (?:(?!js)[^/]*)
// The others are (?:<pattern>)<type>
re += pl!.close;
if (pl!.type === "!") {
negativeLists.push(pl);
}
pl!.reEnd = re.length;
continue;
case "|":
if (inClass || !patternListStack.length || escaping) {
re += "\\|";
escaping = false;
continue;
}
clearStateChar();
re += "|";
continue;
// these are mostly the same in regexp and glob
case "[":
// swallow any state-tracking char before the [
clearStateChar();
if (inClass) {
re += "\\" + c;
continue;
}
inClass = true;
classStart = i;
reClassStart = re.length;
re += c;
continue;
case "]":
// a right bracket shall lose its special
// meaning and represent itself in
// a bracket expression if it occurs
// first in the list. -- POSIX.2 2.8.3.2
if (i === classStart + 1 || !inClass) {
re += "\\" + c;
escaping = false;
continue;
}
// handle the case where we left a class open.
// "[z-a]" is valid, equivalent to "\[z-a\]"
if (inClass) {
// split where the last [ was, make sure we don't have
// an invalid re. if so, re-walk the contents of the
// would-be class to re-translate any characters that
// were passed through as-is
// TODO: It would probably be faster to determine this
// without a try/catch and a new RegExp, but it's tricky
// to do safely. For now, this is safe and works.
var cs = pattern.substring(classStart + 1, i);
try {
RegExp("[" + cs + "]");
} catch (er) {
// not a valid class!
var sp = this.parse(cs, true);
re = re.substr(0, reClassStart) + "\\[" + (sp as any)[0] + "\\]";
hasMagic = hasMagic || (sp as any)[1];
inClass = false;
continue;
}
}
// finish up the class.
hasMagic = true;
inClass = false;
re += c;
continue;
default:
// swallow any state char that wasn't consumed
clearStateChar();
if (escaping) {
// no need
escaping = false;
} else if (reSpecials[c] && !(c === "^" && inClass)) {
re += "\\";
}
re += c;
} // switch
} // for
// handle the case where we left a class open.
// "[abc" is valid, equivalent to "\[abc"
if (inClass) {
// split where the last [ was, and escape it
// this is a huge pita. We now have to re-walk
// the contents of the would-be class to re-translate
// any characters that were passed through as-is
cs = pattern.substr(classStart + 1);
sp = this.parse(cs, true);
re = re.substr(0, reClassStart) + "\\[" + (sp as any)[0];
hasMagic = hasMagic || (sp as any)[1];
}
// handle the case where we had a +( thing at the *end*
// of the pattern.
// each pattern list stack adds 3 chars, and we need to go through
// and escape any | chars that were passed through as-is for the regexp.
// Go through and escape them, taking care not to double-escape any
// | chars that were already escaped.
for (pl = patternListStack.pop(); pl; pl = patternListStack.pop()) {
var tail = re.slice(pl.reStart + pl.open.length);
this.debug("setting tail", re, pl);
// maybe some even number of \, then maybe 1 \, followed by a |
tail = tail.replace(/((?:\\{2}){0,64})(\\?)\|/g, function (_, $1, $2) {
if (!$2) {
// the | isn't already escaped, so escape it.
$2 = "\\";
}
// need to escape all those slashes *again*, without escaping the
// one that we need for escaping the | character. As it works out,
// escaping an even number of slashes can be done by simply repeating
// it exactly after itself. That's why this trick works.
//
// I am sorry that you have to see this.
return $1 + $1 + $2 + "|";
});
this.debug("tail=%j\n %s", tail, tail, pl, re);
var t = pl.type === "*" ? star : pl.type === "?" ? qmark : "\\" + pl.type;
hasMagic = true;
re = re.slice(0, pl.reStart) + t + "\\(" + tail;
}
// handle trailing things that only matter at the very end.
clearStateChar();
if (escaping) {
// trailing \\
re += "\\\\";
}
// only need to apply the nodot start if the re starts with
// something that could conceivably capture a dot
var addPatternStart = false;
switch (re.charAt(0)) {
case ".":
case "[":
case "(":
addPatternStart = true;
}
// Hack to work around lack of negative lookbehind in JS
// A pattern like: *.!(x).!(y|z) needs to ensure that a name
// like 'a.xyz.yz' doesn't match. So, the first negative
// lookahead, has to look ALL the way ahead, to the end of
// the pattern.
for (var n = negativeLists.length - 1; n > -1; n--) {
var nl = negativeLists[n];
var nlBefore = re.slice(0, nl!.reStart);
var nlFirst = re.slice(nl!.reStart, nl!.reEnd! - 8);
var nlLast = re.slice(nl!.reEnd! - 8, nl!.reEnd);
var nlAfter = re.slice(nl!.reEnd);
nlLast += nlAfter;
// Handle nested stuff like *(*.js|!(*.json)), where open parens
// mean that we should *not* include the ) in the bit that is considered
// "after" the negated section.
var openParensBefore = nlBefore.split("(").length - 1;
var cleanAfter = nlAfter;
for (i = 0; i < openParensBefore; i++) {
cleanAfter = cleanAfter.replace(/\)[+*?]?/, "");
}
nlAfter = cleanAfter;
var dollar = "";
if (nlAfter === "" && !isSub) {
dollar = "$";
}
var newRe = nlBefore + nlFirst + nlAfter + dollar + nlLast;
re = newRe;
}
// if the re is not "" at this point, then we need to make sure
// it doesn't match against an empty path part.
// Otherwise a/* will match a/, which it should not.
if (re !== "" && hasMagic) {
re = "(?=.)" + re;
}
if (addPatternStart) {
re = patternStart + re;
}
// parsing just a piece of a larger pattern.
if (isSub) {
return [re, hasMagic];
}
// skip the regexp for non-magical patterns
// unescape anything in it, though, so that it'll be
// an exact match against a file etc.
if (!hasMagic) {
return globUnescape(pattern);
}
var flags = options.nocase ? "i" : "";
try {
var regExp = new RegExp("^" + re + "$", flags);
} catch (er) {
// If it was an invalid regular expression, then it can't match
// anything. This trick looks for a character after the end of
// the string, which is of course impossible, except in multi-line
// mode, but it's not a /m regex.
return new RegExp("$.");
}
(regExp as any)._glob = pattern;
(regExp as any)._src = re;
return regExp;
}
// set partial to true to test if, for example,
// "/a/b" matches the start of "/*/b/*/d"
// Partial means, if you run out of file before you run
// out of pattern, then that's fine, as long as all
// the parts match.
matchOne(file: string | any[], pattern: string | any[], partial: any) {
var options = this.options;
this.debug("matchOne", { this: this, file: file, pattern: pattern });
this.debug("matchOne", file.length, pattern.length);
for (
var fi = 0, pi = 0, fl = file.length, pl = pattern.length;
fi < fl && pi < pl;
fi++, pi++
) {
this.debug("matchOne loop");
var p = pattern[pi];
var f = file[fi];
this.debug(pattern, p, f);
// should be impossible.
// some invalid regexp stuff in the set.
if (p === false) return false;
if (p === GLOBSTAR) {
this.debug("GLOBSTAR", [pattern, p, f]);
// "**"
// a/**/b/**/c would match the following:
// a/b/x/y/z/c
// a/x/y/z/b/c
// a/b/x/b/x/c
// a/b/c
// To do this, take the rest of the pattern after
// the **, and see if it would match the file remainder.
// If so, return success.
// If not, the ** "swallows" a segment, and try again.
// This is recursively awful.
//
// a/**/b/**/c matching a/b/x/y/z/c
// - a matches a
// - doublestar
// - matchOne(b/x/y/z/c, b/**/c)
// - b matches b
// - doublestar
// - matchOne(x/y/z/c, c) -> no
// - matchOne(y/z/c, c) -> no
// - matchOne(z/c, c) -> no
// - matchOne(c, c) yes, hit
var fr = fi;
var pr = pi + 1;
if (pr === pl) {
this.debug("** at the end");
// a ** at the end will just swallow the rest.
// We have found a match.
// however, it will not swallow /.x, unless
// options.dot is set.
// . and .. are *never* matched by **, for explosively
// exponential reasons.
for (; fi < fl; fi++) {
if (
file[fi] === "." ||
file[fi] === ".." ||
(!options.dot && file[fi].charAt(0) === ".")
)
return false;
}
return true;
}
// ok, let's see if we can swallow whatever we can.
while (fr < fl) {
var swallowee = file[fr];
this.debug("\nglobstar while", file, fr, pattern, pr, swallowee);
// XXX remove this slice. Just pass the start index.
if (this.matchOne(file.slice(fr), pattern.slice(pr), partial)) {
this.debug("globstar found match!", fr, fl, swallowee);
// found a match.
return true;
} else {
// can't swallow "." or ".." ever.
// can only swallow ".foo" when explicitly asked.
if (
swallowee === "." ||
swallowee === ".." ||
(!options.dot && swallowee.charAt(0) === ".")
) {
this.debug("dot detected!", file, fr, pattern, pr);
break;
}
// ** swallows a segment, and continue.
this.debug("globstar swallow a segment, and continue");
fr++;
}
}
// no match was found.
// However, in partial mode, we can't say this is necessarily over.
// If there's more *pattern* left, then
if (partial) {
// ran out of file
this.debug("\n>>> no match, partial?", file, fr, pattern, pr);
if (fr === fl) return true;
}
return false;
}
// something other than **
// non-magic patterns just have to match exactly
// patterns with magic have been turned into regexps.
var hit;
if (typeof p === "string") {
if (options.nocase) {
hit = f.toLowerCase() === p.toLowerCase();
} else {
hit = f === p;
}
this.debug("string match", p, f, hit);
} else {
hit = f.match(p);
this.debug("pattern match", p, f, hit);
}
if (!hit) return false;
}
// Note: ending in / means that we'll get a final ""
// at the end of the pattern. This can only match a
// corresponding "" at the end of the file.
// If the file ends in /, then it can only match a
// a pattern that ends in /, unless the pattern just
// doesn't have any more for it. But, a/b/ should *not*
// match "a/b/*", even though "" matches against the
// [^/]*? pattern, except in partial mode, where it might
// simply not be reached yet.
// However, a/b/ should still satisfy a/*
// now either we fell off the end of the pattern, or we're done.
if (fi === fl && pi === pl) {
// ran out of pattern and filename at the same time.
// an exact hit!
return true;
} else if (fi === fl) {
// ran out of file, but still had pattern left.
// this is ok if we're doing the match as part of
// a glob fs traversal.
return partial;
} else if (pi === pl) {
// ran out of pattern, still have file left.
// this is only acceptable if we're on the very last
// empty segment of a file with a trailing slash.
// a/* should match a/b/
var emptyFileEnd = fi === fl - 1 && file[fi] === "";
return emptyFileEnd;
}
// should be unreachable.
throw new Error("wtf?");
}
static makeRe(pattern: string, options?: IOptions) {
return new Minimatch(pattern, options || {}).makeRe();
}
makeRe() {
if (this.regexp || this.regexp === false) return this.regexp;
// at this point, this.set is a 2d array of partial
// pattern strings, or "**".
//
// It's better to use .match(). This function shouldn't
// be used, really, but it's pretty convenient sometimes,
// when you just want to work with a regex.
var set = this.set;
if (!set.length) {
this.regexp = false;
return this.regexp;
}
var options = this.options;
var twoStar = options.noglobstar
? star
: options.dot
? twoStarDot
: twoStarNoDot;
var flags = options.nocase ? "i" : "";
var re = (set as any)
.map(function (pattern: string[]) {
return pattern
.map(function (p) {
return p === GLOBSTAR
? twoStar
: typeof p === "string"
? regExpEscape(p)
: (p as any)._src;
})
.join("\\/");
})
.join("|");
// must match entire pattern
// ending in a * or ** will make it less strict.
re = "^(?:" + re + ")$";
// can match anything, as long as it's not this.
if (this.negate) re = "^(?!" + re + ").*$";
try {
this.regexp = new RegExp(re, flags);
} catch (ex) {
this.regexp = false;
}
return this.regexp;
}
static match(list: string[], pattern: string, options: IOptions) {
options = options || {};
var mm = new Minimatch(pattern, options);
list = list.filter(function (f) {
return mm.match(f);
});
if (mm.options.nonull && !list.length) {
list.push(pattern);
}
return list;
}
/**
* Return true if the filename matches the pattern, or false otherwise.
*/
match(f: string, partial?: boolean): boolean {
this.debug("match", f, this.pattern);
// short-circuit in the case of busted things.
// comments, etc.
if (this.comment) return false;
if (this.empty) return f === "";
if (f === "/" && partial) return true;
var options = this.options;
// windows: need to use /, not \
if (path.sep !== "/") {
f = f.split(path.sep).join("/");
}
// treat the test path as a set of pathparts.
const files = f.split(slashSplit);
this.debug(this.pattern, "split", files);
// just ONE of the pattern sets in this.set needs to match
// in order for it to be valid. If negating, then just one
// match means that we have failed.
// Either way, return on the first hit.
var set = this.set;
this.debug(this.pattern, "set", set);
// Find the basename of the path by looking for the last non-empty segment
var filename: string | undefined;
var i;
for (i = f.length - 1; i >= 0; i--) {
filename = f[i];
if (filename) break;
}
for (i = 0; i < set.length; i++) {
var pattern = set[i];
var file: (string | undefined)[] = files;
if (options.matchBase && pattern.length === 1) {
file = [filename];
}
var hit = this.matchOne(file, pattern, partial);
if (hit) {
if (options.flipNegate) return true;
return !this.negate;
}
}
// didn't get any hits. this is success if it's a negative
// pattern, failure otherwise.
if (options.flipNegate) return false;
return this.negate;
}
}
// replace stuff like \* with *
function globUnescape(s: string) {
return s.replace(/\\(.)/g, "$1");
}
function regExpEscape(s: string) {
return s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}