Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: memory usage on giant repos #591

Merged
merged 10 commits into from
May 29, 2024
Merged
906 changes: 242 additions & 664 deletions CHANGELOG.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
"dependencies": {
"@oclif/core": "^3.26.6",
"@salesforce/core": "^7.3.9",
"@salesforce/kit": "^3.1.1",
"@salesforce/source-deploy-retrieve": "^11.6.2",
"@salesforce/kit": "^3.1.2",
"@salesforce/source-deploy-retrieve": "^11.6.3",
"@salesforce/ts-types": "^2.0.9",
"fast-xml-parser": "^4.3.6",
"graceful-fs": "^4.2.11",
Expand Down
18 changes: 18 additions & 0 deletions src/shared/local/functions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright (c) 2023, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import path from 'node:path';
import { WORKDIR, HEAD } from './localShadowRepo';
import { FILE } from './localShadowRepo';

import { StatusRow } from './types';

// filenames were normalized when read from isogit

export const toFilenames = (rows: StatusRow[]): string[] => rows.map((row) => row[FILE]);
export const isDeleted = (status: StatusRow): boolean => status[WORKDIR] === 0;
export const isAdded = (status: StatusRow): boolean => status[HEAD] === 0 && status[WORKDIR] === 2;
export const ensureWindows = (filepath: string): string => path.win32.normalize(filepath);
273 changes: 49 additions & 224 deletions src/shared/localShadowRepo.ts → src/shared/local/localShadowRepo.ts

Large diffs are not rendered by default.

220 changes: 220 additions & 0 deletions src/shared/local/moveDetection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
/*
* Copyright (c) 2023, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
import path from 'node:path';
import { Logger, Lifecycle } from '@salesforce/core';
import {
MetadataResolver,
SourceComponent,
RegistryAccess,
VirtualTreeContainer,
} from '@salesforce/source-deploy-retrieve';
// @ts-expect-error isogit has both ESM and CJS exports but node16 module/resolution identifies it as ESM
import git from 'isomorphic-git';
import * as fs from 'graceful-fs';
import { Performance } from '@oclif/core';
import { sourceComponentGuard } from '../guards';
import { isDeleted, isAdded, ensureWindows, toFilenames } from './functions';
import { AddAndDeleteMaps, FilenameBasenameHash, StatusRow, StringMap } from './types';

type AddAndDeleteFileInfos = { addedInfo: FilenameBasenameHash[]; deletedInfo: FilenameBasenameHash[] };
type AddedAndDeletedFilenames = { added: Set<string>; deleted: Set<string> };

/** composed functions to simplified use by the shadowRepo class */
export const filenameMatchesToMap =
(isWindows: boolean) =>
(registry: RegistryAccess) =>
(projectPath: string) =>
(gitDir: string) =>
async ({ added, deleted }: AddedAndDeletedFilenames): Promise<StringMap> =>
removeNonMatches(isWindows)(registry)(
compareHashes(
await buildMaps(
await toFileInfo({
projectPath,
gitDir,
added,
deleted,
})
)
)
);

/** compare delete and adds from git.status, matching basenames of the files. returns early when there's nothing to match */
export const getMatches = (status: StatusRow[]): AddedAndDeletedFilenames => {
// We check for moved files in incremental steps and exit as early as we can to avoid any performance degradation
// Deleted files will be more rare than added files, so we'll check them first and exit early if there are none
const emptyResult = { added: new Set<string>(), deleted: new Set<string>() };
const deletedFiles = status.filter(isDeleted);
if (!deletedFiles.length) return emptyResult;

const addedFiles = status.filter(isAdded);
if (!addedFiles.length) return emptyResult;

// Both arrays have contents, look for matching basenames
const addedFilenames = toFilenames(addedFiles);
const deletedFilenames = toFilenames(deletedFiles);

// Build Sets of basenames for added and deleted files for quick lookups
const addedBasenames = new Set(addedFilenames.map((filename) => path.basename(filename)));
const deletedBasenames = new Set(deletedFilenames.map((filename) => path.basename(filename)));

// TODO: when node 22 is everywhere, we can use Set.prototype.intersection
// Again, we filter over the deleted files first and exit early if there are no filename matches
const deletedFilenamesWithMatches = new Set(deletedFilenames.filter((f) => addedBasenames.has(path.basename(f))));
if (!deletedFilenamesWithMatches.size) return emptyResult;

const addedFilenamesWithMatches = new Set(addedFilenames.filter((f) => deletedBasenames.has(path.basename(f))));
if (!addedFilenamesWithMatches.size) return emptyResult;

return { added: addedFilenamesWithMatches, deleted: deletedFilenamesWithMatches };
};

/** build maps of the add/deletes with filenames, returning the matches Logs if non-matches */
const buildMaps = async ({ addedInfo, deletedInfo }: AddAndDeleteFileInfos): Promise<AddAndDeleteMaps> => {
const [addedMap, addedIgnoredMap] = buildMap(addedInfo);
const [deletedMap, deletedIgnoredMap] = buildMap(deletedInfo);

// If we detected any files that have the same basename and hash, emit a warning and send telemetry
// These files will still show up as expected in the `sf project deploy preview` output
// We could add more logic to determine and display filepaths that we ignored...
// but this is likely rare enough to not warrant the added complexity
// Telemetry will help us determine how often this occurs
if (addedIgnoredMap.size || deletedIgnoredMap.size) {
const message = 'Files were found that have the same basename and hash. Skipping the commit of these files';
const logger = Logger.childFromRoot('ShadowRepo.compareHashes');
logger.warn(message);
const lifecycle = Lifecycle.getInstance();
await Promise.all([
lifecycle.emitWarning(message),
lifecycle.emitTelemetry({ eventName: 'moveFileHashBasenameCollisionsDetected' }),
]);
}
return { addedMap, deletedMap };
};

/** builds a map of the values from both maps */
const compareHashes = ({ addedMap, deletedMap }: AddAndDeleteMaps): StringMap => {
const matches: StringMap = new Map();

for (const [addedKey, addedValue] of addedMap) {
const deletedValue = deletedMap.get(addedKey);
if (deletedValue) {
matches.set(addedValue, deletedValue);
}
}

return matches;
};

/** given a StringMap, resolve the metadata types and return things that having matching type/parent */
const removeNonMatches =
(isWindows: boolean) =>
(registry: RegistryAccess) =>
(matches: StringMap): StringMap => {
if (!matches.size) return matches;
const addedFiles = isWindows ? [...matches.keys()].map(ensureWindows) : [...matches.keys()];
const deletedFiles = isWindows ? [...matches.values()].map(ensureWindows) : [...matches.values()];
const resolverAdded = new MetadataResolver(registry, VirtualTreeContainer.fromFilePaths(addedFiles));
const resolverDeleted = new MetadataResolver(registry, VirtualTreeContainer.fromFilePaths(deletedFiles));

return new Map(
[...matches.entries()].filter(([addedFile, deletedFile]) => {
// we're only ever using the first element of the arrays
const [resolvedAdded] = resolveType(resolverAdded, isWindows ? [ensureWindows(addedFile)] : [addedFile]);
const [resolvedDeleted] = resolveType(
resolverDeleted,
isWindows ? [ensureWindows(deletedFile)] : [deletedFile]
);
return (
// they could match, or could both be undefined (because unresolved by SDR)
resolvedAdded?.type.name === resolvedDeleted?.type.name &&
// parent names match, if resolved and there are parents
resolvedAdded?.parent?.name === resolvedDeleted?.parent?.name &&
// parent types match, if resolved and there are parents
resolvedAdded?.parent?.type.name === resolvedDeleted?.parent?.type.name
);
})
);
};

/** enrich the filenames with basename and oid (hash) */
const toFileInfo = async ({
projectPath,
gitDir,
added,
deleted,
}: {
projectPath: string;
gitDir: string;
added: Set<string>;
deleted: Set<string>;
}): Promise<AddAndDeleteFileInfos> => {
// Track how long it takes to gather the oid information from the git trees
const getInfoMarker = Performance.mark('@salesforce/source-tracking', 'localShadowRepo.detectMovedFiles#toFileInfo', {
addedFiles: added.size,
deletedFiles: deleted.size,
});

const headRef = await git.resolveRef({ fs, dir: projectPath, gitdir: gitDir, ref: 'HEAD' });
const [addedInfo, deletedInfo] = await Promise.all([
await Promise.all(Array.from(added).map(getHashForAddedFile(projectPath))),
await Promise.all(Array.from(deleted).map(getHashFromActualFileContents(gitDir)(projectPath)(headRef))),
]);

getInfoMarker?.stop();

return { addedInfo, deletedInfo };
};

const buildMap = (info: FilenameBasenameHash[]): StringMap[] => {
const map: StringMap = new Map();
const ignore: StringMap = new Map();
info.map((i) => {
const key = `${i.hash}#${i.basename}`;
// If we find a duplicate key, we need to remove it and ignore it in the future.
// Finding duplicate hash#basename means that we cannot accurately determine where it was moved to or from
if (map.has(key) || ignore.has(key)) {
map.delete(key);
ignore.set(key, i.filename);
} else {
map.set(key, i.filename);
}
});
return [map, ignore];
};

const getHashForAddedFile =
(projectPath: string) =>
async (filepath: string): Promise<FilenameBasenameHash> => ({
filename: filepath,
basename: path.basename(filepath),
hash: (await git.hashBlob({ object: await fs.promises.readFile(path.join(projectPath, filepath)) })).oid,
});

const resolveType = (resolver: MetadataResolver, filenames: string[]): SourceComponent[] =>
filenames
.flatMap((filename) => {
try {
return resolver.getComponentsFromPath(filename);
} catch (e) {
const logger = Logger.childFromRoot('ShadowRepo.compareTypes');
logger.warn(`unable to resolve ${filename}`);
return undefined;
}
})
.filter(sourceComponentGuard);

/** where we don't have git objects to use, read the file contents to generate the hash */
const getHashFromActualFileContents =
(gitdir: string) =>
(projectPath: string) =>
(oid: string) =>
async (filepath: string): Promise<FilenameBasenameHash> => ({
filename: filepath,
basename: path.basename(filepath),
hash: (await git.readBlob({ fs, dir: projectPath, gitdir, filepath, oid })).oid,
});
11 changes: 11 additions & 0 deletions src/shared/local/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
* Copyright (c) 2023, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
export type FilenameBasenameHash = { filename: string; hash: string; basename: string };
export type StringMap = Map<string, string>;
export type AddAndDeleteMaps = { addedMap: StringMap; deletedMap: StringMap }; // https://isomorphic-git.org/docs/en/statusMatrix#docsNav

export type StatusRow = [file: string, head: number, workdir: number, stage: number];
2 changes: 1 addition & 1 deletion src/sourceTracking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import {
import { filePathsFromMetadataComponent } from '@salesforce/source-deploy-retrieve/lib/src/utils/filePathGenerator';
import { Performance } from '@oclif/core';
import { RemoteSourceTrackingService, remoteChangeElementToChangeResult } from './shared/remoteSourceTrackingService';
import { ShadowRepo } from './shared/localShadowRepo';
import { ShadowRepo } from './shared/local/localShadowRepo';
import { throwIfConflicts, findConflictsInComponentSet, getDedupedConflictsFromChanges } from './shared/conflicts';
import {
RemoteSyncInput,
Expand Down
2 changes: 1 addition & 1 deletion test/nuts/local/commitPerf.nut.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import path from 'node:path';
import { TestSession } from '@salesforce/cli-plugins-testkit';
import { expect } from 'chai';
import { RegistryAccess } from '@salesforce/source-deploy-retrieve';
import { ShadowRepo } from '../../../src/shared/localShadowRepo';
import { ShadowRepo } from '../../../src/shared/local/localShadowRepo';

describe('perf testing for big commits', () => {
let session: TestSession;
Expand Down
63 changes: 35 additions & 28 deletions test/nuts/local/localTrackingFileMovesDecomposedChild.nut.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ import { TestSession } from '@salesforce/cli-plugins-testkit';
import { expect } from 'chai';
import * as fs from 'graceful-fs';
import { RegistryAccess } from '@salesforce/source-deploy-retrieve';
import { ShadowRepo } from '../../../src/shared/localShadowRepo';
import { ShadowRepo } from '../../../src/shared/local/localShadowRepo';

/* eslint-disable no-unused-expressions */

describe('ignores moved files that are children of a decomposed metadata type', () => {
const FIELD = path.join('fields', 'Account__c.field-meta.xml');
let session: TestSession;
let repo: ShadowRepo;
let filesToSync: string[];
let objectsDir: string;

before(async () => {
session = await TestSession.create({
Expand All @@ -26,12 +27,17 @@ describe('ignores moved files that are children of a decomposed metadata type',
},
devhubAuthStrategy: 'NONE',
});
objectsDir = path.join(session.project.dir, 'force-app', 'main', 'default', 'objects');
});

after(async () => {
await session?.clean();
});

afterEach(() => {
delete process.env.SF_BETA_TRACK_FILE_MOVES;
});

it('initialize the local tracking', async () => {
repo = await ShadowRepo.getInstance({
orgId: 'fakeOrgId',
Expand All @@ -44,32 +50,12 @@ describe('ignores moved files that are children of a decomposed metadata type',
it('should ignore moved child metadata', async () => {
expect(process.env.SF_BETA_TRACK_FILE_MOVES).to.be.undefined;
process.env.SF_BETA_TRACK_FILE_MOVES = 'true';
// Commit the existing class files
filesToSync = await repo.getChangedFilenames();
// Commit the existing files
const filesToSync = await repo.getChangedFilenames();
await repo.commitChanges({ deployedFiles: filesToSync });

// move all the classes to the new folder
const objectFieldOld = path.join(
session.project.dir,
'force-app',
'main',
'default',
'objects',
'Order__c',
'fields',
'Account__c.field-meta.xml'
);
const objectFieldNew = path.join(
session.project.dir,
'force-app',
'main',
'default',
'objects',
'Product__c',
'fields',
'Account__c.field-meta.xml'
);
// fs.mkdirSync(path.join(session.project.dir, 'force-app', 'main', 'foo'), { recursive: true });
// move the field from one object to another
const objectFieldOld = path.join(objectsDir, 'Order__c', FIELD);
const objectFieldNew = path.join(objectsDir, 'Product__c', FIELD);
fs.renameSync(objectFieldOld, objectFieldNew);

await repo.getStatus(true);
Expand All @@ -78,6 +64,27 @@ describe('ignores moved files that are children of a decomposed metadata type',
.to.be.an('array')
.with.lengthOf(2);

delete process.env.SF_BETA_TRACK_FILE_MOVES;
// put it back how it was and verify the tracking
fs.renameSync(objectFieldNew, objectFieldOld);
await repo.getStatus(true);

expect(await repo.getChangedFilenames())
.to.be.an('array')
.with.lengthOf(0);
});

it('should clear tracking when the field is moved to another dir', async () => {
const newDir = path.join(session.project.dir, 'force-app', 'other', 'objects', 'Order__c', 'fields');
await fs.promises.mkdir(newDir, {
recursive: true,
});
const objectFieldOld = path.join(objectsDir, 'Order__c', FIELD);
const objectFieldNew = path.join(objectsDir, 'Order__c', FIELD);
fs.renameSync(objectFieldOld, objectFieldNew);
await repo.getStatus(true);

expect(await repo.getChangedFilenames())
.to.be.an('array')
.with.lengthOf(0);
});
});
Loading
Loading