Merge pull request #2292 from ing-bank/fix/providence-glob-perf

Fix/providence glob perf
This commit is contained in:
Thijs Louisse 2024-05-21 16:07:46 +02:00 committed by GitHub
commit def3df4c08
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 334 additions and 194 deletions

View file

@ -0,0 +1,8 @@
---
'providence-analytics': patch
---
- expose types; improve logging and formatting
- apply memoisation in optimised-glob
- add perf logs to LogService
- allow to clear cache of memoized function

View file

@ -24,7 +24,8 @@
},
"files": [
"inlined-swc-to-babel",
"src"
"src",
"types"
],
"scripts": {
"dashboard": "node ./src/dashboard/server.js --run-server --serve-from-package-root",

View file

@ -1,10 +1,11 @@
/* eslint-disable no-shadow, no-param-reassign */
import path from 'path';
import t from '@babel/types';
import babelTraverse from '@babel/traverse';
import { Analyzer } from '../core/Analyzer.js';
import t from '@babel/types';
import { trackDownIdentifierFromScope } from '../utils/track-down-identifier--legacy.js';
import { Analyzer } from '../core/Analyzer.js';
/**
* @typedef {import('@babel/types').File} File

View file

@ -1,5 +1,5 @@
/* eslint-disable no-continue */
import pathLib from 'path';
import path from 'path';
/* eslint-disable no-shadow, no-param-reassign */
import FindImportsAnalyzer from './find-imports.js';
import FindExportsAnalyzer from './find-exports.js';
@ -9,14 +9,15 @@ import { transformIntoIterableFindExportsOutput } from './helpers/transform-into
import { transformIntoIterableFindImportsOutput } from './helpers/transform-into-iterable-find-imports-output.js';
/**
* @typedef {import('../../../types/index.js').FindImportsAnalyzerResult} FindImportsAnalyzerResult
* @typedef {import('../../../types/index.js').FindExportsAnalyzerResult} FindExportsAnalyzerResult
* @typedef {import('../../../types/index.js').ConciseMatchImportsAnalyzerResult} ConciseMatchImportsAnalyzerResult
* @typedef {import('../../../types/index.js').IterableFindExportsAnalyzerEntry} IterableFindExportsAnalyzerEntry
* @typedef {import('../../../types/index.js').IterableFindImportsAnalyzerEntry} IterableFindImportsAnalyzerEntry
* @typedef {import('../../../types/index.js').ConciseMatchImportsAnalyzerResult} ConciseMatchImportsAnalyzerResult
* @typedef {import('../../../types/index.js').MatchImportsConfig} MatchImportsConfig
* @typedef {import('../../../types/index.js').MatchImportsAnalyzerResult} MatchImportsAnalyzerResult
* @typedef {import('../../../types/index.js').PathRelativeFromProjectRoot} PathRelativeFromProjectRoot
* @typedef {import('../../../types/index.js').MatchImportsAnalyzerResult} MatchImportsAnalyzerResult
* @typedef {import('../../../types/index.js').FindImportsAnalyzerResult} FindImportsAnalyzerResult
* @typedef {import('../../../types/index.js').FindExportsAnalyzerResult} FindExportsAnalyzerResult
* @typedef {import('../../../types/index.js').AnalyzerQueryResult} AnalyzerQueryResult
* @typedef {import('../../../types/index.js').MatchImportsConfig} MatchImportsConfig
* @typedef {import('../../../types/index.js').PathFromSystemRoot} PathFromSystemRoot
* @typedef {import('../../../types/index.js').AnalyzerName} AnalyzerName
* @typedef {import('../../../types/index.js').AnalyzerAst} AnalyzerAst
@ -117,7 +118,7 @@ async function matchImportsPostprocess(exportsAnalyzerResult, importsAnalyzerRes
const fromImportToExport = await fromImportToExportPerspective({
importee: importEntry.normalizedSource,
importer: /** @type {PathFromSystemRoot} */ (
pathLib.resolve(importProjectPath, importEntry.file)
path.resolve(importProjectPath, importEntry.file)
),
importeeProjectPath: cfg.referenceProjectPath,
});
@ -193,6 +194,7 @@ export default class MatchImportsAnalyzer extends Analyzer {
* Prepare
*/
const cachedAnalyzerResult = await this._prepare(cfg);
if (cachedAnalyzerResult) {
return cachedAnalyzerResult;
}
@ -205,6 +207,7 @@ export default class MatchImportsAnalyzer extends Analyzer {
targetProjectPath: cfg.referenceProjectPath,
skipCheckMatchCompatibility: cfg.skipCheckMatchCompatibility,
suppressNonCriticalLogs: true,
gatherFilesConfig: cfg.gatherFilesConfigReference,
});
}
@ -216,6 +219,7 @@ export default class MatchImportsAnalyzer extends Analyzer {
targetProjectPath: cfg.targetProjectPath,
skipCheckMatchCompatibility: cfg.skipCheckMatchCompatibility,
suppressNonCriticalLogs: true,
gatherFilesConfig: cfg.gatherFilesConfig,
});
}

View file

@ -1,27 +1,39 @@
/* eslint-disable no-param-reassign */
import semver from 'semver';
import path from 'path';
import { LogService } from './LogService.js';
import { QueryService } from './QueryService.js';
import { ReportService } from './ReportService.js';
import semver from 'semver';
import { getFilePathRelativeFromRoot } from '../utils/get-file-path-relative-from-root.js';
import { InputDataService } from './InputDataService.js';
import { toPosixPath } from '../utils/to-posix-path.js';
import { getFilePathRelativeFromRoot } from '../utils/get-file-path-relative-from-root.js';
import { ReportService } from './ReportService.js';
import { QueryService } from './QueryService.js';
import { LogService } from './LogService.js';
/**
* @typedef {import("@swc/core").Module} SwcAstModule
* @typedef {import('../../../types/index.js').AnalyzerName} AnalyzerName
* @typedef {import('../../../types/index.js').AnalyzerAst} AnalyzerAst
* @typedef {import('../../../types/index.js').PathFromSystemRoot} PathFromSystemRoot
* @typedef {import('../../../types/index.js').QueryOutput} QueryOutput
* @typedef {import('../../../types/index.js').ProjectInputData} ProjectInputData
* @typedef {(ast: File, astContext: {code:string; relativePath:string; projectData: ProjectInputDataWithMeta}) => object} FileAstTraverseFn
* @typedef {import('../../../types/index.js').ProjectInputDataWithMeta} ProjectInputDataWithMeta
* @typedef {import('../../../types/index.js').AnalyzerQueryResult} AnalyzerQueryResult
* @typedef {import('../../../types/index.js').MatchAnalyzerConfig} MatchAnalyzerConfig
* @typedef {import('../../../types/index.js').PathFromSystemRoot} PathFromSystemRoot
* @typedef {import('../../../types/index.js').ProjectInputData} ProjectInputData
* @typedef {import('../../../types/index.js').AnalyzerName} AnalyzerName
* @typedef {import('../../../types/index.js').AnalyzerAst} AnalyzerAst
* @typedef {import('../../../types/index.js').QueryOutput} QueryOutput
* @typedef {import("@swc/core").Module} SwcAstModule
* @typedef {import('@babel/types').File} File
* @typedef {(ast: File, astContext: {code:string; relativePath:string; projectData: ProjectInputDataWithMeta}) => object} FileAstTraverseFn
*/
/**
* @param {string} identifier
*/
function displayProjectsInLog(identifier) {
const [target, targetV, , reference, referenceV] = identifier.split('_');
return decodeURIComponent(
`${target}@${targetV} ${reference ? `- ${reference}@${referenceV}` : ''}`,
);
}
/**
* Analyzes one entry: the callback can traverse a given ast for each entry
* @param {ProjectInputDataWithMeta} projectData
@ -247,12 +259,7 @@ export class Analyzer {
if (!compatible) {
if (!cfg.suppressNonCriticalLogs) {
LogService.info(
`skipping ${LogService.pad(this.name, 16)} for ${
this.identifier
}: (${reason})\n${cfg.targetProjectPath.replace(
`${process.cwd()}/providence-input-data/search-targets/`,
'',
)}`,
`${LogService.pad(`skipping ${this.name} (${reason})`)}${displayProjectsInLog(this.identifier)}`,
);
}
return ensureAnalyzerResultFormat(`[${reason}]`, cfg, this);
@ -273,24 +280,43 @@ export class Analyzer {
}
if (!cfg.suppressNonCriticalLogs) {
LogService.info(`starting ${LogService.pad(this.name, 16)} for ${this.identifier}`);
LogService.info(
`${LogService.pad(`starting ${this.name}`)}${displayProjectsInLog(this.identifier)}`,
);
}
/**
* Get reference and search-target data
*/
if (!cfg.targetProjectResult) {
performance.mark('analyzer--prepare--createDTarg-start');
this.targetData = await InputDataService.createDataObject(
[cfg.targetProjectPath],
cfg.gatherFilesConfig,
);
performance.mark('analyzer--prepare--createDTarg-end');
const m1 = performance.measure(
'analyzer--prepare--createDTarg',
'analyzer--prepare--createDTarg-start',
'analyzer--prepare--createDTarg-end',
);
LogService.perf(m1);
}
if (cfg.referenceProjectPath) {
performance.mark('analyzer--prepare--createDRef-start');
this.referenceData = await InputDataService.createDataObject(
[cfg.referenceProjectPath],
cfg.gatherFilesConfigReference || cfg.gatherFilesConfig,
);
performance.mark('analyzer--prepare--createDRef-end');
const m2 = performance.measure(
'analyzer--prepare--createDRef',
'analyzer--prepare--createDRef-start',
'analyzer--prepare--createDRef-end',
);
LogService.perf(m2);
}
return undefined;
@ -304,10 +330,21 @@ export class Analyzer {
_finalize(queryOutput, cfg) {
LogService.debug(`Analyzer "${this.name}": started _finalize method`);
performance.mark('analyzer--finalize-start');
const analyzerResult = ensureAnalyzerResultFormat(queryOutput, cfg, this);
if (!cfg.suppressNonCriticalLogs) {
LogService.success(`finished ${LogService.pad(this.name, 16)} for ${this.identifier}`);
LogService.success(
`${LogService.pad(`finished ${this.name}`)}${displayProjectsInLog(this.identifier)}`,
);
}
performance.mark('analyzer--finalize-end');
const measurementFinalize = performance.measure(
'analyzer--finalize',
'analyzer--finalize-start',
'analyzer--finalize-end',
);
LogService.perf(measurementFinalize);
return analyzerResult;
}

View file

@ -97,11 +97,25 @@ export class LogService {
log(colors.fgBlue, ` info${printTitle(title)}`, colors.reset, text);
}
/**
* @param {PerformanceMeasure} measurement
* @param {string} [title]
*/
static perf(measurement, title) {
const text = `${this.pad(`[${measurement.name}]`)} ${measurement.duration}ms`;
// @ts-ignore
this._logHistory.push(`- perf -${printTitle(title)} ${text}`);
if (this.allMuted || !this.perfEnabled) {
return;
}
log(colors.fgGray, ` perf${printTitle(title)}`, colors.reset, text);
}
/**
* @param {string} text
* @param {number} minChars
*/
static pad(text, minChars = 20) {
static pad(text, minChars = 40) {
let result = text;
const padding = minChars - text.length;
if (padding > 0) {
@ -127,6 +141,7 @@ export class LogService {
LogService.debugEnabled = false;
LogService.allMuted = false;
LogService.throwsOnError = false;
LogService.perfEnabled = false;
/** @type {string[]} */
LogService._logHistory = [];

View file

@ -9,13 +9,13 @@ import { LogService } from './core/LogService.js';
import { AstService } from './core/AstService.js';
/**
* @typedef {import('../../types/index.js').ProvidenceConfig} ProvidenceConfig
* @typedef {import('../../types/index.js').PathFromSystemRoot} PathFromSystemRoot
* @typedef {import('../../types/index.js').QueryResult} QueryResult
* @typedef {import('../../types/index.js').AnalyzerQueryResult} AnalyzerQueryResult
* @typedef {import('../../types/index.js').QueryConfig} QueryConfig
* @typedef {import('../../types/index.js').AnalyzerQueryConfig} AnalyzerQueryConfig
* @typedef {import('../../types/index.js').PathFromSystemRoot} PathFromSystemRoot
* @typedef {import('../../types/index.js').GatherFilesConfig} GatherFilesConfig
* @typedef {import('../../types/index.js').ProvidenceConfig} ProvidenceConfig
* @typedef {import('../../types/index.js').QueryResult} QueryResult
* @typedef {import('../../types/index.js').QueryConfig} QueryConfig
*/
/**
@ -81,6 +81,8 @@ function getSlicedQueryConfig(queryConfig, targetProjectPath, referenceProjectPa
* @param {{ gatherFilesConfig:GatherFilesConfig, gatherFilesConfigReference:GatherFilesConfig, skipCheckMatchCompatibility:boolean }} cfg
*/
async function handleAnalyzerForProjectCombo(slicedQConfig, cfg) {
performance.mark(`${slicedQConfig.analyzerName}-start`);
const queryResult = await QueryService.astSearch(slicedQConfig, {
gatherFilesConfig: cfg.gatherFilesConfig,
gatherFilesConfigReference: cfg.gatherFilesConfigReference,
@ -88,6 +90,17 @@ async function handleAnalyzerForProjectCombo(slicedQConfig, cfg) {
addSystemPathsInResult: cfg.addSystemPathsInResult,
...slicedQConfig.analyzerConfig,
});
performance.mark(`${slicedQConfig.analyzerName}-end`);
const measurement = /** @type {* & PerformanceMeasure} */ (
performance.measure(
slicedQConfig.analyzerName,
`${slicedQConfig.analyzerName}-start`,
`${slicedQConfig.analyzerName}-end`,
)
);
LogService.perf(measurement);
if (queryResult) {
report(queryResult, cfg);
}

View file

@ -24,10 +24,9 @@ function createCachableArg(arg) {
/**
* @template T
* @type {<T extends Function>(functionToMemoize:T, opts?:{ storage?:object; }) => T}
* @type {<T extends Function>(functionToMemoize:T, opts?:{ cacheStorage?:object; }) => T & {clearCache:() => void}}
*/
export function memoize(functionToMemoize, { storage = {} } = {}) {
return /** @type {* & T} */ (
export function memoize(functionToMemoize, { cacheStorage = {} } = {}) {
function memoizedFn() {
// eslint-disable-next-line prefer-rest-params
const args = [...arguments];
@ -36,18 +35,22 @@ export function memoize(functionToMemoize, { storage = {} } = {}) {
const cachableArgs = shouldSerialize ? args.map(createCachableArg) : args;
// Allow disabling of cache for testing purposes
// @ts-expect-error
if (shouldCache && cachableArgs in storage) {
if (shouldCache && cachableArgs in cacheStorage) {
// @ts-expect-error
return storage[cachableArgs];
return cacheStorage[cachableArgs];
}
// @ts-expect-error
const outcome = functionToMemoize.apply(this, args);
// @ts-expect-error
// eslint-disable-next-line no-param-reassign
storage[cachableArgs] = outcome;
cacheStorage[cachableArgs] = outcome;
return outcome;
}
);
memoizedFn.clearCache = () => {
// eslint-disable-next-line no-param-reassign
cacheStorage = {};
};
return /** @type {* & T & {clearCache:() => void}} */ (memoizedFn);
}
/**

View file

@ -4,22 +4,25 @@ import nodeFs from 'fs';
import path from 'path';
import { toPosixPath } from './to-posix-path.js';
import { memoize } from './memoize.js';
/**
* @typedef {nodeFs} FsLike
* @typedef {nodeFs.Dirent & {path:string;parentPath:string}} DirentWithPath
* @typedef {{onlyDirectories:boolean;onlyFiles:boolean;deep:number;suppressErrors:boolean;fs: FsLike;cwd:string;absolute:boolean;extglob:boolean;}} FastGlobtions
*/
const [nodeMajor] = process.versions.node.split('.').map(Number);
/**
export const parseGlobToRegex = memoize(
/**
* @param {string} glob
* @param {object} [providedOpts]
* @param {boolean} [providedOpts.globstar=true] if true, '/foo/*' => '^\/foo\/[^/]*$' (not allowing folders inside *), else '/foo/*' => '^\/foo\/.*$'
* @param {boolean} [providedOpts.extglob=true] if true, supports so called "extended" globs (like bash) and single character matching, matching ranges of characters, group matching etc.
* @returns {RegExp}
*/
export function parseGlobToRegex(glob, providedOpts) {
(glob, providedOpts) => {
if (typeof glob !== 'string') throw new TypeError('Expected a string');
const options = {
@ -84,7 +87,9 @@ export function parseGlobToRegex(glob, providedOpts) {
regexResultStr += '.*';
} else {
const isGlobstarSegment =
isMultiStar && ['/', undefined].includes(prevChar) && ['/', undefined].includes(nextChar);
isMultiStar &&
['/', undefined].includes(prevChar) &&
['/', undefined].includes(nextChar);
if (isGlobstarSegment) {
// Match zero or more path segments
regexResultStr += '((?:[^/]*(?:/|$))*)';
@ -101,12 +106,14 @@ export function parseGlobToRegex(glob, providedOpts) {
}
return new RegExp(`^${regexResultStr}$`);
}
},
);
/**
const getStartPath = memoize(
/**
* @param {string} glob
*/
function getStartPath(glob) {
glob => {
const reservedChars = ['?', '[', ']', '{', '}', ',', '.', '*'];
let hasFoundReservedChar = false;
return glob
@ -118,45 +125,68 @@ function getStartPath(glob) {
})
.filter(Boolean)
.join('/');
}
},
);
let isCacheEnabled = false;
/** @type {{[path:string]:nodeFs.Dirent[]}} */
/** @type {{[path:string]:DirentWithPath[]}} */
const cache = {};
/**
const getAllDirentsFromStartPath = memoize(
/**
* @param {string} startPath
* @param {{fs?:FsLike, dirents?:nodeFs.Dirent[]}} providedOptions
* @returns {Promise<nodeFs.Dirent[]>}
* @param {{fs?:FsLike, dirents?:DirentWithPath[]}} providedOptions
* @returns {Promise<DirentWithPath[]>}
*/
async function getAllFilesFromStartPath(
startPath,
{ fs = /** @type {* & FsLike} */ (nodeFs), dirents = [] } = {},
) {
async (startPath, { fs = /** @type {* & FsLike} */ (nodeFs), dirents = [] } = {}) => {
if (isCacheEnabled && cache[startPath]) return cache[startPath];
// Older node doesn't support recursive option
if (nodeMajor < 18) {
/** @type {nodeFs.Dirent[]} */
const direntsForLvl = await fs.promises.readdir(startPath, { withFileTypes: true });
for (const dirent of direntsForLvl) {
// @ts-expect-error
for (const _dirent of direntsForLvl) {
const dirent = /** @type {DirentWithPath} */ (_dirent);
dirent.parentPath = dirent.path = startPath; // eslint-disable-line no-multi-assign
dirents.push(dirent);
dirents.push(/** @type {DirentWithPath} */ (dirent));
if (dirent.isDirectory()) {
const subDir = path.join(startPath, dirent.name);
await getAllFilesFromStartPath(subDir, { fs, dirents });
await getAllDirentsFromStartPath(subDir, { fs, dirents });
}
}
return /** @type {nodeFs.Dirent[]} */ (dirents);
return dirents;
}
dirents.push(
// @ts-expect-error
dirents.push(...(await fs.promises.readdir(startPath, { withFileTypes: true, recursive: true })));
...(await fs.promises.readdir(startPath, { withFileTypes: true, recursive: true })),
);
cache[startPath] = dirents;
return dirents;
}
},
);
const getAllDirentsRelativeToCwd = memoize(
/**
* @param {string} fullStartPath
* @param {{fs?:FsLike, cwd:string}} options
* @returns {Promise<{relativeToCwdPath:string;dirent:DirentWithPath}[]>}
*/
async (fullStartPath, options) => {
const allDirentsRelativeToStartPath = await getAllDirentsFromStartPath(fullStartPath, {
fs: options.fs,
});
const allDirEntsRelativeToCwd = allDirentsRelativeToStartPath.map(dirent => ({
relativeToCwdPath: toPosixPath(
path.join(dirent.parentPath || dirent.path, dirent.name),
).replace(`${toPosixPath(options.cwd)}/`, ''),
dirent,
}));
return allDirEntsRelativeToCwd;
},
);
/**
* Lightweight glob implementation.
@ -219,18 +249,11 @@ export async function optimisedGlob(globOrGlobs, providedOptions = {}) {
const fullStartPath = path.join(options.cwd, startPath);
try {
const allDirentsRelativeToStartPath = await getAllFilesFromStartPath(fullStartPath, {
const allDirEntsRelativeToCwd = await getAllDirentsRelativeToCwd(fullStartPath, {
cwd: options.cwd,
fs: options.fs,
});
const allDirEntsRelativeToCwd = allDirentsRelativeToStartPath.map(dirent => ({
relativeToCwdPath: toPosixPath(
// @ts-expect-error
path.join(dirent.parentPath || dirent.path, dirent.name),
).replace(`${toPosixPath(options.cwd)}/`, ''),
dirent,
}));
globEntries.push(...allDirEntsRelativeToCwd);
} catch (e) {
if (!options.suppressErrors) {
@ -262,8 +285,8 @@ export async function optimisedGlob(globOrGlobs, providedOptions = {}) {
filteredPaths = filteredPaths.map(f => toPosixPath(path.join(options.cwd, f)));
if (process.platform === 'win32') {
const driveLetter = path.win32.resolve(options.cwd).slice(0, 1).toUpperCase();
filteredPaths = filteredPaths.map(f => `${driveLetter}:${f}`);
const driveChar = path.win32.resolve(options.cwd).slice(0, 1).toUpperCase();
filteredPaths = filteredPaths.map(f => `${driveChar}:${f}`);
}
}
@ -273,10 +296,16 @@ export async function optimisedGlob(globOrGlobs, providedOptions = {}) {
const result = options.unique ? Array.from(new Set(filteredPaths)) : filteredPaths;
return result.sort((a, b) => {
const res = result.sort((a, b) => {
const pathDiff = a.split('/').length - b.split('/').length;
return pathDiff !== 0 ? pathDiff : a.localeCompare(b);
});
// It could happen the fs changes with the next call, so we clear the cache
getAllDirentsRelativeToCwd.clearCache();
getAllDirentsFromStartPath.clearCache();
return res;
}
optimisedGlob.disableCache = () => {

View file

@ -201,13 +201,13 @@ describe('Memoize', () => {
sumCalled += 1;
return { ...a, ...b };
}
const sumMemoized = memoize(sum, { serializeObjects: true });
const sumMemoized = memoize(sum);
let sum2Called = 0;
function sum2(/** @type {object} a */ a, /** @type {object} a */ b) {
sum2Called += 1;
return { ...a, ...b };
}
const sum2Memoized = memoize(sum2, { serializeObjects: true });
const sum2Memoized = memoize(sum2);
expect(sumMemoized({ x: 1 }, { y: 2 })).to.deep.equal({ x: 1, y: 2 });
expect(sumCalled).to.equal(1);
@ -233,7 +233,7 @@ describe('Memoize', () => {
sumCalled += 1;
return { ...a, ...b };
}
const sumMemoized = memoize(sum, { serializeObjects: true });
const sumMemoized = memoize(sum);
// Put in cache for args combination
const result = sumMemoized({ x: 1 }, { y: 2 });
@ -313,4 +313,33 @@ describe('Memoize', () => {
expect(sum2Called).to.equal(1);
});
});
describe('Cache', () => {
it(`"memoizedFn.clearCache()" clears the cache for a memoized fn"`, async () => {
let sumCalled = 0;
function sum(/** @type {string} a */ a, /** @type {string} a */ b) {
sumCalled += 1;
return a + b;
}
const sumMemoized = memoize(sum);
// Put in cache for args combination
expect(sumMemoized('1', '2')).to.equal('12');
expect(sumCalled).to.equal(1);
// Return from cache
expect(sumMemoized('1', '2')).to.equal('12');
expect(sumCalled).to.equal(1);
sumMemoized.clearCache();
// Now the original function is called again
expect(sumMemoized('1', '2')).to.equal('12');
expect(sumCalled).to.equal(2);
// Return from new cache again
expect(sumMemoized('1', '2')).to.equal('12');
expect(sumCalled).to.equal(2);
});
});
});