From 1a1bdb76dbc58370bca021199081852cbc835ccd Mon Sep 17 00:00:00 2001 From: Thijs Louisse Date: Fri, 17 May 2024 20:38:03 +0200 Subject: [PATCH] feat(providence): apply memoisation in optimised-glob --- .../src/program/utils/optimised-glob.js | 301 ++++++++++-------- 1 file changed, 165 insertions(+), 136 deletions(-) diff --git a/packages-node/providence-analytics/src/program/utils/optimised-glob.js b/packages-node/providence-analytics/src/program/utils/optimised-glob.js index 7cc3b9e03..8230758b0 100644 --- a/packages-node/providence-analytics/src/program/utils/optimised-glob.js +++ b/packages-node/providence-analytics/src/program/utils/optimised-glob.js @@ -4,159 +4,189 @@ import nodeFs from 'fs'; import path from 'path'; import { toPosixPath } from './to-posix-path.js'; +import { memoize } from './memoize.js'; /** * @typedef {nodeFs} FsLike + * @typedef {nodeFs.Dirent & {path:string;parentPath:string}} DirentWithPath * @typedef {{onlyDirectories:boolean;onlyFiles:boolean;deep:number;suppressErrors:boolean;fs: FsLike;cwd:string;absolute:boolean;extglob:boolean;}} FastGlobtions */ const [nodeMajor] = process.versions.node.split('.').map(Number); -/** - * @param {string} glob - * @param {object} [providedOpts] - * @param {boolean} [providedOpts.globstar=true] if true, '/foo/*' => '^\/foo\/[^/]*$' (not allowing folders inside *), else '/foo/*' => '^\/foo\/.*$' - * @param {boolean} [providedOpts.extglob=true] if true, supports so called "extended" globs (like bash) and single character matching, matching ranges of characters, group matching etc. - * @returns {RegExp} - */ -export function parseGlobToRegex(glob, providedOpts) { - if (typeof glob !== 'string') throw new TypeError('Expected a string'); +export const parseGlobToRegex = memoize( + /** + * @param {string} glob + * @param {object} [providedOpts] + * @param {boolean} [providedOpts.globstar=true] if true, '/foo/*' => '^\/foo\/[^/]*$' (not allowing folders inside *), else '/foo/*' => '^\/foo\/.*$' + * @param {boolean} [providedOpts.extglob=true] if true, supports so called "extended" globs (like bash) and single character matching, matching ranges of characters, group matching etc. + * @returns {RegExp} + */ + (glob, providedOpts) => { + if (typeof glob !== 'string') throw new TypeError('Expected a string'); - const options = { - globstar: true, - extglob: true, - ...providedOpts, - }; + const options = { + globstar: true, + extglob: true, + ...providedOpts, + }; - let regexResultStr = ''; - let isInGroup = false; - let currentChar; + let regexResultStr = ''; + let isInGroup = false; + let currentChar; - for (let i = 0; i < glob.length; i += 1) { - currentChar = glob[i]; + for (let i = 0; i < glob.length; i += 1) { + currentChar = glob[i]; - const charsToEscape = ['/', '$', '^', '+', '.', '(', ')', '=', '!', '|']; - if (charsToEscape.includes(currentChar)) { - regexResultStr += `\\${currentChar}`; - continue; // eslint-disable-line no-continue - } - - if (options.extglob) { - if (currentChar === '?') { - regexResultStr += '.'; + const charsToEscape = ['/', '$', '^', '+', '.', '(', ')', '=', '!', '|']; + if (charsToEscape.includes(currentChar)) { + regexResultStr += `\\${currentChar}`; continue; // eslint-disable-line no-continue } - if (['[', ']'].includes(currentChar)) { - regexResultStr += currentChar; - continue; // eslint-disable-line no-continue - } - if (currentChar === '{') { - isInGroup = true; - regexResultStr += '('; - continue; // eslint-disable-line no-continue - } - if (currentChar === '}') { - isInGroup = false; - regexResultStr += ')'; - continue; // eslint-disable-line no-continue - } - } - if (currentChar === ',') { - if (isInGroup) { - regexResultStr += '|'; - continue; // eslint-disable-line no-continue - } - regexResultStr += `\\${currentChar}`; - continue; // eslint-disable-line no-continue - } - - if (currentChar === '*') { - const prevChar = glob[i - 1]; - let isMultiStar = false; - while (glob[i + 1] === '*') { - isMultiStar = true; - i += 1; - } - const nextChar = glob[i + 1]; - if (!options.globstar) { - // Treat any number of "*" as one - regexResultStr += '.*'; - } else { - const isGlobstarSegment = - isMultiStar && ['/', undefined].includes(prevChar) && ['/', undefined].includes(nextChar); - if (isGlobstarSegment) { - // Match zero or more path segments - regexResultStr += '((?:[^/]*(?:/|$))*)'; - // Move over the "/" - i += 1; - } else { - // Only match one path segment - regexResultStr += '([^/]*)'; + if (options.extglob) { + if (currentChar === '?') { + regexResultStr += '.'; + continue; // eslint-disable-line no-continue + } + if (['[', ']'].includes(currentChar)) { + regexResultStr += currentChar; + continue; // eslint-disable-line no-continue + } + if (currentChar === '{') { + isInGroup = true; + regexResultStr += '('; + continue; // eslint-disable-line no-continue + } + if (currentChar === '}') { + isInGroup = false; + regexResultStr += ')'; + continue; // eslint-disable-line no-continue } } - continue; // eslint-disable-line no-continue + + if (currentChar === ',') { + if (isInGroup) { + regexResultStr += '|'; + continue; // eslint-disable-line no-continue + } + regexResultStr += `\\${currentChar}`; + continue; // eslint-disable-line no-continue + } + + if (currentChar === '*') { + const prevChar = glob[i - 1]; + let isMultiStar = false; + while (glob[i + 1] === '*') { + isMultiStar = true; + i += 1; + } + const nextChar = glob[i + 1]; + if (!options.globstar) { + // Treat any number of "*" as one + regexResultStr += '.*'; + } else { + const isGlobstarSegment = + isMultiStar && + ['/', undefined].includes(prevChar) && + ['/', undefined].includes(nextChar); + if (isGlobstarSegment) { + // Match zero or more path segments + regexResultStr += '((?:[^/]*(?:/|$))*)'; + // Move over the "/" + i += 1; + } else { + // Only match one path segment + regexResultStr += '([^/]*)'; + } + } + continue; // eslint-disable-line no-continue + } + regexResultStr += currentChar; } - regexResultStr += currentChar; - } - return new RegExp(`^${regexResultStr}$`); -} + return new RegExp(`^${regexResultStr}$`); + }, +); -/** - * @param {string} glob - */ -function getStartPath(glob) { - const reservedChars = ['?', '[', ']', '{', '}', ',', '.', '*']; - let hasFoundReservedChar = false; - return glob - .split('/') - .map(part => { - if (hasFoundReservedChar) return undefined; - hasFoundReservedChar = reservedChars.some(reservedChar => part.includes(reservedChar)); - return hasFoundReservedChar ? undefined : part; - }) - .filter(Boolean) - .join('/'); -} +const getStartPath = memoize( + /** + * @param {string} glob + */ + glob => { + const reservedChars = ['?', '[', ']', '{', '}', ',', '.', '*']; + let hasFoundReservedChar = false; + return glob + .split('/') + .map(part => { + if (hasFoundReservedChar) return undefined; + hasFoundReservedChar = reservedChars.some(reservedChar => part.includes(reservedChar)); + return hasFoundReservedChar ? undefined : part; + }) + .filter(Boolean) + .join('/'); + }, +); let isCacheEnabled = false; -/** @type {{[path:string]:nodeFs.Dirent[]}} */ +/** @type {{[path:string]:DirentWithPath[]}} */ const cache = {}; -/** - * @param {string} startPath - * @param {{fs?:FsLike, dirents?:nodeFs.Dirent[]}} providedOptions - * @returns {Promise} - */ -async function getAllFilesFromStartPath( - startPath, - { fs = /** @type {* & FsLike} */ (nodeFs), dirents = [] } = {}, -) { - if (isCacheEnabled && cache[startPath]) return cache[startPath]; +const getAllDirentsFromStartPath = memoize( + /** + * @param {string} startPath + * @param {{fs?:FsLike, dirents?:DirentWithPath[]}} providedOptions + * @returns {Promise} + */ + async (startPath, { fs = /** @type {* & FsLike} */ (nodeFs), dirents = [] } = {}) => { + if (isCacheEnabled && cache[startPath]) return cache[startPath]; - // Older node doesn't support recursive option - if (nodeMajor < 18) { - /** @type {nodeFs.Dirent[]} */ - const direntsForLvl = await fs.promises.readdir(startPath, { withFileTypes: true }); - for (const dirent of direntsForLvl) { - // @ts-expect-error - dirent.parentPath = dirent.path = startPath; // eslint-disable-line no-multi-assign - dirents.push(dirent); + // Older node doesn't support recursive option + if (nodeMajor < 18) { + /** @type {nodeFs.Dirent[]} */ + const direntsForLvl = await fs.promises.readdir(startPath, { withFileTypes: true }); + for (const _dirent of direntsForLvl) { + const dirent = /** @type {DirentWithPath} */ (_dirent); + dirent.parentPath = dirent.path = startPath; // eslint-disable-line no-multi-assign + dirents.push(/** @type {DirentWithPath} */ (dirent)); - if (dirent.isDirectory()) { - const subDir = path.join(startPath, dirent.name); - await getAllFilesFromStartPath(subDir, { fs, dirents }); + if (dirent.isDirectory()) { + const subDir = path.join(startPath, dirent.name); + await getAllDirentsFromStartPath(subDir, { fs, dirents }); + } } + return dirents; } - return /** @type {nodeFs.Dirent[]} */ (dirents); - } - // @ts-expect-error - dirents.push(...(await fs.promises.readdir(startPath, { withFileTypes: true, recursive: true }))); - cache[startPath] = dirents; - return dirents; -} + dirents.push( + // @ts-expect-error + ...(await fs.promises.readdir(startPath, { withFileTypes: true, recursive: true })), + ); + cache[startPath] = dirents; + return dirents; + }, +); + +const getAllDirentsRelativeToCwd = memoize( + /** + * @param {string} fullStartPath + * @param {{fs?:FsLike, cwd:string}} options + * @returns {Promise<{relativeToCwdPath:string;dirent:DirentWithPath}[]>} + */ + async (fullStartPath, options) => { + const allDirentsRelativeToStartPath = await getAllDirentsFromStartPath(fullStartPath, { + fs: options.fs, + }); + + const allDirEntsRelativeToCwd = allDirentsRelativeToStartPath.map(dirent => ({ + relativeToCwdPath: toPosixPath( + path.join(dirent.parentPath || dirent.path, dirent.name), + ).replace(`${toPosixPath(options.cwd)}/`, ''), + dirent, + })); + return allDirEntsRelativeToCwd; + }, +); /** * Lightweight glob implementation. @@ -219,18 +249,11 @@ export async function optimisedGlob(globOrGlobs, providedOptions = {}) { const fullStartPath = path.join(options.cwd, startPath); try { - const allDirentsRelativeToStartPath = await getAllFilesFromStartPath(fullStartPath, { + const allDirEntsRelativeToCwd = await getAllDirentsRelativeToCwd(fullStartPath, { + cwd: options.cwd, fs: options.fs, }); - const allDirEntsRelativeToCwd = allDirentsRelativeToStartPath.map(dirent => ({ - relativeToCwdPath: toPosixPath( - // @ts-expect-error - path.join(dirent.parentPath || dirent.path, dirent.name), - ).replace(`${toPosixPath(options.cwd)}/`, ''), - dirent, - })); - globEntries.push(...allDirEntsRelativeToCwd); } catch (e) { if (!options.suppressErrors) { @@ -262,8 +285,8 @@ export async function optimisedGlob(globOrGlobs, providedOptions = {}) { filteredPaths = filteredPaths.map(f => toPosixPath(path.join(options.cwd, f))); if (process.platform === 'win32') { - const driveLetter = path.win32.resolve(options.cwd).slice(0, 1).toUpperCase(); - filteredPaths = filteredPaths.map(f => `${driveLetter}:${f}`); + const driveChar = path.win32.resolve(options.cwd).slice(0, 1).toUpperCase(); + filteredPaths = filteredPaths.map(f => `${driveChar}:${f}`); } } @@ -273,10 +296,16 @@ export async function optimisedGlob(globOrGlobs, providedOptions = {}) { const result = options.unique ? Array.from(new Set(filteredPaths)) : filteredPaths; - return result.sort((a, b) => { + const res = result.sort((a, b) => { const pathDiff = a.split('/').length - b.split('/').length; return pathDiff !== 0 ? pathDiff : a.localeCompare(b); }); + + // It could happen the fs changes with the next call, so we clear the cache + getAllDirentsRelativeToCwd.clearCache(); + getAllDirentsFromStartPath.clearCache(); + + return res; } optimisedGlob.disableCache = () => {