feat: optimised-glob util

This commit is contained in:
Thijs Louisse 2024-05-08 14:10:04 +02:00
parent 8fd9ec4df1
commit 4bef4c2661
2 changed files with 550 additions and 0 deletions

View file

@ -0,0 +1,276 @@
/* eslint-disable no-case-declarations */
/* eslint-disable no-fallthrough */
import nodeFs from 'fs';
import path from 'path';
import { toPosixPath } from './to-posix-path.js';
const [nodeMajor] = process.versions.node.split('.').map(Number);
if (nodeMajor < 18) {
throw new Error('[optimisedGlob] Node.js version 18 or higher is required');
}
/**
* @typedef {import('memfs').Volume|nodeFs} FsLike
* @typedef {{onlyDirectories:boolean;onlyFiles:boolean;deep:number;suppressErrors:boolean;fs: FsLike;cwd:string;absolute:boolean;extglob:boolean;}} FastGlobtions
*/
/**
* @param {string} glob
* @param {string} [providedOpts]
* @param {boolean} [globstar=true] if true, '/foo/*' => '^\/foo\/[^/]*$' (not allowing folders inside *), else '/foo/*' => '^\/foo\/.*$'
* @param {boolean} [extglob=true] if true, supports so called "extended" globs (like bash) and single character matching, matching ranges of characters, group matching etc.
* @returns {RegExp}
*/
export function parseGlobToRegex(glob, providedOpts) {
if (typeof glob !== 'string') throw new TypeError('Expected a string');
const options = {
globstar: true,
extglob: true,
...providedOpts,
};
let regexResultStr = '';
let isInGroup = false;
let currentChar;
for (let i = 0; i < glob.length; i += 1) {
currentChar = glob[i];
const charsToEscape = ['/', '$', '^', '+', '.', '(', ')', '=', '!', '|'];
if (charsToEscape.includes(currentChar)) {
regexResultStr += `\\${currentChar}`;
continue; // eslint-disable-line no-continue
}
if (options.extglob) {
if (currentChar === '?') {
regexResultStr += '.';
continue; // eslint-disable-line no-continue
}
if (['[', ']'].includes(currentChar)) {
regexResultStr += currentChar;
continue; // eslint-disable-line no-continue
}
if (currentChar === '{') {
isInGroup = true;
regexResultStr += '(';
continue; // eslint-disable-line no-continue
}
if (currentChar === '}') {
isInGroup = false;
regexResultStr += ')';
continue; // eslint-disable-line no-continue
}
}
if (currentChar === ',') {
if (isInGroup) {
regexResultStr += '|';
continue; // eslint-disable-line no-continue
}
regexResultStr += `\\${currentChar}`;
continue; // eslint-disable-line no-continue
}
if (currentChar === '*') {
const prevChar = glob[i - 1];
let isMultiStar = false;
while (glob[i + 1] === '*') {
isMultiStar = true;
i += 1;
}
const nextChar = glob[i + 1];
if (!options.globstar) {
// Treat any number of "*" as one
regexResultStr += '.*';
} else {
const isGlobstarSegment =
isMultiStar && ['/', undefined].includes(prevChar) && ['/', undefined].includes(nextChar);
if (isGlobstarSegment) {
// Match zero or more path segments
regexResultStr += '((?:[^/]*(?:/|$))*)';
// Move over the "/"
i += 1;
} else {
// Only match one path segment
regexResultStr += '([^/]*)';
}
}
continue; // eslint-disable-line no-continue
}
regexResultStr += currentChar;
}
return new RegExp(`^${regexResultStr}$`);
}
/**
* @param {string} glob
*/
function getStartPath(glob) {
const reservedChars = ['?', '[', ']', '{', '}', ',', '.', '*'];
let hasFoundReservedChar = false;
return glob
.split('/')
.map(part => {
if (hasFoundReservedChar) return undefined;
hasFoundReservedChar = reservedChars.some(reservedChar => part.includes(reservedChar));
return hasFoundReservedChar ? undefined : part;
})
.filter(Boolean)
.join('/');
}
let isCacheEnabled = false;
/** @type {{[path:string]:nodeFs.Dirent[]}} */
const cache = {};
/**
* @param {string} startPath
* @param {{fs?:FsLike}} providedOptions
* @returns {Promise<nodeFs.Dirent[]>|nodeFs.Dirent[]}
*/
function getAllFilesFromStartPath(fullStartPath, { fs = /** @type {* & FsLike} */ (nodeFs) } = {}) {
if (isCacheEnabled && cache[fullStartPath]) return cache[fullStartPath];
return new Promise((resolve, reject) => {
fs.promises
.readdir(fullStartPath, { withFileTypes: true, recursive: true })
.then((/** @type {* & nodeFs.Dirent[]} */ files) => {
cache[fullStartPath] = files;
resolve(files);
})
.catch(e => {
reject(e);
});
});
}
/**
* Lightweight glob implementation.
* It's a drop-in replacement for globby, but it's faster, a few hundred lines of code and has no dependencies.
* @param {string|string[]} globOrGlobs
* @param {Partial<FastGlobtions>} providedOptions
* @returns {Promise<string[]>}
*/
export async function optimisedGlob(globOrGlobs, providedOptions = {}) {
const options = {
fs: /** @type {* & FsLike} */ (nodeFs),
onlyDirectories: false,
suppressErrors: true,
cwd: process.cwd(),
absolute: false,
onlyFiles: true,
deep: Infinity,
globstar: true,
extglob: true,
unique: true,
sync: false,
dot: false,
// TODO: ignore, throwErrorOnBrokenSymbolicLink, markDirectories, objectMode, onlyDirectories, onlyFiles, stats
// https://github.com/mrmlnc/fast-glob?tab=readme-ov-file
...providedOptions,
};
if (!options.onlyFiles) {
// This makes behavior aligned with globby
options.onlyDirectories = true;
}
const globs = Array.isArray(globOrGlobs) ? globOrGlobs : [globOrGlobs];
/** @type {RegExp[]} */
const matchRegexesNegative = [];
/** @type {RegExp[]} */
const matchRegexes = [];
/** @type {{dirent:nodeFs.Dirent;relativeToCwdPath:string}[]} */
const globEntries = [];
for (const glob of globs) {
const isNegative = glob.startsWith('!');
// Relative paths like './my/folder/**/*.js' are changed to 'my/folder/**/*.js'
const globNormalized = glob.replace(/^\.\//g, '').slice(isNegative ? 1 : 0);
const regexForGlob = parseGlobToRegex(globNormalized, {
globstar: options.globstar,
extglob: options.extglob,
});
if (isNegative) {
matchRegexesNegative.push(regexForGlob);
} else {
matchRegexes.push(regexForGlob);
}
// Search for the "deepest" starting point in the filesystem that we can use to search the fs
const startPath = getStartPath(globNormalized);
const fullStartPath = path.join(options.cwd, startPath);
try {
const allDirentsRelativeToStartPath = await getAllFilesFromStartPath(fullStartPath, {
fs: options.fs,
});
const allDirEntsRelativeToCwd = allDirentsRelativeToStartPath.map(dirent => ({
// @ts-expect-error
relativeToCwdPath: toPosixPath(path.join(dirent.path, dirent.name)).replace(
`${options.cwd}/`,
'',
),
dirent,
}));
globEntries.push(...allDirEntsRelativeToCwd);
} catch (e) {
if (!options.suppressErrors) {
throw e;
}
}
}
// TODO: for perf, combine options checks instead of doing multiple filters and maps
const matchedEntries = globEntries.filter(
globEntry =>
matchRegexes.some(globRe => globRe.test(globEntry.relativeToCwdPath)) &&
!matchRegexesNegative.some(globReNeg => globReNeg.test(globEntry.relativeToCwdPath)),
);
const allFileOrDirectoryEntries = matchedEntries.filter(({ dirent }) =>
options.onlyDirectories ? dirent.isDirectory() : dirent.isFile(),
);
let filteredPaths = allFileOrDirectoryEntries.map(({ relativeToCwdPath }) => relativeToCwdPath);
if (!options.dot) {
filteredPaths = filteredPaths.filter(
f => !f.split('/').some(folderOrFile => folderOrFile.startsWith('.')),
);
}
if (options.absolute) {
filteredPaths = filteredPaths.map(f => path.posix.join(options.cwd, f));
if (process.platform === 'win32') {
const driveLetter = path.win32.resolve(options.cwd).slice(0, 1).toUpperCase();
filteredPaths = filteredPaths.map(f => `${driveLetter}:${f}`);
}
}
if (options.deep !== Infinity) {
filteredPaths = filteredPaths.filter(f => f.split('/').length <= options.deep + 2);
}
const result = options.unique ? Array.from(new Set(filteredPaths)) : filteredPaths;
return result.sort((a, b) => {
const pathDiff = a.split('/').length - b.split('/').length;
return pathDiff !== 0 ? pathDiff : a.localeCompare(b);
});
}
optimisedGlob.disableCache = () => {
isCacheEnabled = false;
};

View file

@ -0,0 +1,274 @@
import { globby } from 'globby';
// eslint-disable-next-line import/no-extraneous-dependencies
import { expect } from 'chai';
// import { vol } from 'memfs';
// eslint-disable-next-line import/no-extraneous-dependencies
import mockFs from 'mock-fs';
import { optimisedGlob } from '../../../src/program/utils/optimised-glob.js';
const measurePerf = process.argv.includes('--measure-perf');
/**
* @param {*} patterns
* @param {*} options
* @returns
*/
async function runOptimisedGlobAndCheckGlobbyParity(patterns, options) {
performance.mark('start-optimisedGlob');
const optimisedGlobResult = await optimisedGlob(patterns, options);
performance.mark('end-optimisedGlob');
performance.mark('start-globby');
const globbyResult = await globby(patterns, options);
performance.mark('end-globby');
if (measurePerf) {
const optimisedGlobPerf = performance.measure(
'optimisedGlob',
'start-optimisedGlob',
'end-optimisedGlob',
);
const globbyPerf = performance.measure('globby', 'start-globby', 'end-globby');
console.debug(
`optimisedGlob was ${
globbyPerf.duration - optimisedGlobPerf.duration
}ms quicker than globby.`,
);
}
expect(optimisedGlobResult).to.deep.equal(globbyResult);
return optimisedGlobResult;
}
describe('optimisedGlob', () => {
const testCfg = {
cwd: '/fakeFs',
// fs: vol,
};
beforeEach(() => {
const fakeFs = {
'/fakeFs/my/folder/some/file.js': 'content',
'/fakeFs/my/folder/lvl1/some/file.js': 'content',
'/fakeFs/my/folder/lvl1/lvl2/some/file.js': 'content',
'/fakeFs/my/folder/lvl1/lvl2/lvl3/some/file.js': 'content',
'/fakeFs/my/folder/some/file.d.ts': 'content',
'/fakeFs/my/folder/lvl1/some/file.d.ts': 'content',
'/fakeFs/my/folder/lvl1/lvl2/some/file.d.ts': 'content',
'/fakeFs/my/folder/lvl1/lvl2/lvl3/some/file.d.ts': 'content',
'/fakeFs/my/folder/some/anotherFile.js': 'content',
'/fakeFs/my/folder/lvl1/some/anotherFile.js': 'content',
'/fakeFs/my/folder/lvl1/lvl2/some/anotherFile.js': 'content',
'/fakeFs/my/folder/lvl1/lvl2/lvl3/some/anotherFile.js': 'content',
'/fakeFs/my/folder/some/anotherFile.d.ts': 'content',
'/fakeFs/my/folder/lvl1/some/anotherFile.d.ts': 'content',
'/fakeFs/my/folder/lvl1/lvl2/some/anotherFile.d.ts': 'content',
'/fakeFs/my/folder/lvl1/lvl2/lvl3/some/anotherFile.d.ts': 'content',
'/fakeFs/my/.hiddenFile.js': 'content',
};
// vol.fromJSON(fakeFs);
mockFs(fakeFs);
});
afterEach(() => {
// vol.reset();
mockFs.restore();
});
describe('Star patterns', () => {
it('supports double asterisk like "my/folder/**/some/file.js" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity(
'my/folder/**/some/file.js',
testCfg,
);
expect(files).to.deep.equal([
'my/folder/some/file.js',
'my/folder/lvl1/some/file.js',
'my/folder/lvl1/lvl2/some/file.js',
'my/folder/lvl1/lvl2/lvl3/some/file.js',
]);
});
it('supports single asterisk like "my/folder/*/some/file.js" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('my/folder/*/some/file.js', testCfg);
expect(files).to.deep.equal(['my/folder/lvl1/some/file.js']);
});
it('supports filenames like "my/folder/lvl1/some/*il*.js" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity(
'my/folder/lvl1/some/*il*.js',
testCfg,
);
expect(files).to.deep.equal([
'my/folder/lvl1/some/anotherFile.js',
'my/folder/lvl1/some/file.js',
]);
});
it('supports globs starting with a star like "**/some/file.js" ', async () => {
const filesDoubleStar = await runOptimisedGlobAndCheckGlobbyParity(
'**/some/file.js',
testCfg,
);
expect(filesDoubleStar).to.deep.equal([
'my/folder/some/file.js',
'my/folder/lvl1/some/file.js',
'my/folder/lvl1/lvl2/some/file.js',
'my/folder/lvl1/lvl2/lvl3/some/file.js',
]);
const filesSingleStar = await runOptimisedGlobAndCheckGlobbyParity(
'*/folder/some/file.js',
testCfg,
);
expect(filesSingleStar).to.deep.equal(['my/folder/some/file.js']);
});
it('gives empty output when location does not exist" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('my/folder/**/some/file.js', {
...testCfg,
cwd: '/nonExisting/path', // this will not exist
});
expect(files).to.deep.equal([]);
});
it('omits hidden files" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('*/*/*/*', testCfg);
expect(files).to.deep.equal([
'my/folder/some/anotherFile.d.ts',
'my/folder/some/anotherFile.js',
'my/folder/some/file.d.ts',
'my/folder/some/file.js',
]);
});
});
describe('Accolade patterns', () => {
it('works with filenames like "my/folder/*/some/file.{js,d.ts}" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity(
'my/folder/*/some/file.{js,d.ts}',
testCfg,
);
expect(files).to.deep.equal(['my/folder/lvl1/some/file.d.ts', 'my/folder/lvl1/some/file.js']);
});
});
describe('Multiple globs', () => {
it('accepts an array of globs, like ["my/folder/*/some/file.js", "my/folder/lvl1/*/some/file.js"]', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity(
['my/folder/*/some/file.js', 'my/folder/lvl1/*/some/file.js'],
testCfg,
);
expect(files).to.deep.equal([
'my/folder/lvl1/some/file.js',
'my/folder/lvl1/lvl2/some/file.js',
]);
});
it('accepts nedgated globs, like ["my/folder/**/some/file.js", "!my/folder/*/some/file.js"]', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity(
['my/folder/**/some/file.js', '!my/folder/*/some/file.js'],
testCfg,
);
expect(files).to.deep.equal([
'my/folder/some/file.js',
'my/folder/lvl1/lvl2/some/file.js',
'my/folder/lvl1/lvl2/lvl3/some/file.js',
]);
});
});
describe('Options', () => {
it('"absolute" returns full system paths', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('my/folder/*/some/file.{js,d.ts}', {
...testCfg,
absolute: true,
});
expect(files).to.deep.equal([
'/fakeFs/my/folder/lvl1/some/file.d.ts',
'/fakeFs/my/folder/lvl1/some/file.js',
]);
});
it('"cwd" changes relative starting point of glob', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('folder/*/some/file.{js,d.ts}', {
...testCfg,
cwd: '/fakeFs/my',
});
expect(files).to.deep.equal(['folder/lvl1/some/file.d.ts', 'folder/lvl1/some/file.js']);
});
it('"onlyDirectories" returns only directories/folders', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('my/folder/*/some', {
...testCfg,
onlyDirectories: true,
});
expect(files).to.deep.equal(['my/folder/lvl1/some']);
});
it('"onlyFiles" returns only files', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('my/folder/*/some', {
...testCfg,
onlyFiles: true,
});
expect(files).to.deep.equal([]);
});
it('"deep" limits the level of results', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('my/folder/**', {
...testCfg,
onlyDirectories: true,
deep: 1,
});
expect(files).to.deep.equal(['my/folder/lvl1', 'my/folder/some']);
const files2 = await runOptimisedGlobAndCheckGlobbyParity('my/folder/**', {
...testCfg,
onlyDirectories: true,
deep: 2,
});
expect(files2).to.deep.equal([
'my/folder/lvl1',
'my/folder/some',
'my/folder/lvl1/lvl2',
'my/folder/lvl1/some',
]);
});
it('"dot" allows hidden files" ', async () => {
const files = await runOptimisedGlobAndCheckGlobbyParity('*/*', { ...testCfg, dot: true });
expect(files).to.deep.equal(['my/.hiddenFile.js']);
});
it.skip('"suppressErrors" throws errors when paths do not exist', async () => {
expect(async () =>
optimisedGlob('my/folder/**/some/file.js', {
...testCfg,
cwd: '/nonExisting/path', // this will not exist
suppressErrors: false,
}),
).to.throw();
});
});
});