Skip to content

Commit

Permalink
Improve data source/data source view deletion (#7688)
Browse files Browse the repository at this point in the history
* Use paranoid model for DataSource

* Add deletedAt column

* Refactor delete data source

* Add paranoid on data source view

* Soft delete data source view

* Migration

* ✨

* Fix destroy path

* Account for soft deleted resources

* Migration

* Implement our own soft deletion

* ✨

* ✂️

* 🙈

* 👕

* Migration

* ✨

* Fix typo

* Fix migration before conflict

* Remove destroy, augment delete

* 😰

* ✨

* Address comments from review

* 🙈

* Rename migration

* 📖

* Fix migration name

* ✂️
  • Loading branch information
flvndvd authored Oct 3, 2024
1 parent b806c07 commit fc9e0cd
Show file tree
Hide file tree
Showing 38 changed files with 750 additions and 503 deletions.
110 changes: 0 additions & 110 deletions front/admin/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ import {
SUPPORTED_MODEL_CONFIGS,
} from "@dust-tt/types";
import { CoreAPI } from "@dust-tt/types";
import { Storage } from "@google-cloud/storage";
import parseArgs from "minimist";
import readline from "readline";

import { getConversation } from "@app/lib/api/assistant/conversation";
import {
Expand Down Expand Up @@ -265,114 +263,6 @@ const user = async (command: string, args: parseArgs.ParsedArgs) => {

const dataSource = async (command: string, args: parseArgs.ParsedArgs) => {
switch (command) {
case "delete": {
if (!args.wId) {
throw new Error("Missing --wId argument");
}
if (!args.dsId) {
throw new Error("Missing --dsId argument");
}

const auth = await Authenticator.internalAdminForWorkspace(args.wId);

const dataSource = await DataSourceResource.fetchById(auth, args.dsId);
if (!dataSource) {
throw new Error(
`DataSource not found: wId='${args.wId}' dsId='${args.dsId}'`
);
}

const dustAPIProjectId = dataSource.dustAPIProjectId;

await new Promise((resolve) => {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
rl.question(
`Are you sure you want to definitely delete the following data source and all associated data: wId='${args.wId}' dsId='${args.dsId}' provider='${dataSource.connectorProvider}'? (y/N) `,
(answer: string) => {
rl.close();
if (answer !== "y") {
throw new Error("Aborting");
}
resolve(null);
}
);
});

if (dataSource.connectorId) {
console.log(`Deleting connectorId=${dataSource.connectorId}}`);
const connDeleteRes = await new ConnectorsAPI(
config.getConnectorsAPIConfig(),
logger
).deleteConnector(dataSource.connectorId.toString(), true);
if (connDeleteRes.isErr()) {
throw new Error(connDeleteRes.error.message);
}
}
const coreAPI = new CoreAPI(config.getCoreAPIConfig(), logger);

const coreDeleteRes = await coreAPI.deleteDataSource({
projectId: dataSource.dustAPIProjectId,
dataSourceId: dataSource.dustAPIDataSourceId,
});
if (coreDeleteRes.isErr()) {
throw new Error(coreDeleteRes.error.message);
}

await dataSource.delete(auth);

console.log("Data source deleted. Make sure to run: \n\n");
console.log(
"\x1b[32m%s\x1b[0m",
`./admin/cli.sh data-source scrub --dustAPIProjectId ${dustAPIProjectId}`
);
console.log(
"\n\n...to fully scrub the customer data from our infra (GCS clean-up)."
);
console.log(`WARNING: For Github datasource, the user may want to uninstall the app from Github
to revoke the authorization. If needed, send an email (cf template in lib/email.ts) `);
return;
}

case "scrub": {
if (!args.dustAPIProjectId) {
throw new Error("Missing --dustAPIProjectId argument");
}

const storage = new Storage({ keyFilename: config.getServiceAccount() });

const [files] = await storage
.bucket(config.getDustDataSourcesBucket())
.getFiles({ prefix: `${args.dustAPIProjectId}` });

console.log(`Chunking ${files.length} files...`);
const chunkSize = 32;
const chunks = [];
for (let i = 0; i < files.length; i += chunkSize) {
chunks.push(files.slice(i, i + chunkSize));
}

for (let i = 0; i < chunks.length; i++) {
console.log(`Processing chunk ${i}/${chunks.length}...`);
const chunk = chunks[i];
if (!chunk) {
continue;
}
await Promise.all(
chunk.map((f) => {
return (async () => {
console.log(`Deleting file: ${f.name}`);
await f.delete();
})();
})
);
}

return;
}

case "delete-document": {
if (!args.wId) {
throw new Error("Missing --wId argument");
Expand Down
35 changes: 28 additions & 7 deletions front/lib/api/data_sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ export async function getDataSources(
});
}

export async function deleteDataSource(
/**
* Soft delete a data source. This will mark the data source as deleted and will trigger a scrubbing.
*/
export async function softDeleteDataSourceAndLaunchScrubWorkflow(
auth: Authenticator,
dataSource: DataSourceResource,
transaction?: Transaction
Expand All @@ -70,8 +73,30 @@ export async function deleteDataSource(
});
}

const dustAPIProjectId = dataSource.dustAPIProjectId;
await dataSource.delete(auth, { transaction, hardDelete: false });

// The scrubbing workflow will delete associated resources and hard delete the data source.
await launchScrubDataSourceWorkflow(owner, dataSource);

return new Ok(dataSource.toJSON());
}

/**
* Performs a hard deletion of the specified data source, ensuring complete removal of the data
* source and all its associated resources, including any existing connectors.
*/
export async function hardDeleteDataSource(
auth: Authenticator,
dataSource: DataSourceResource
) {
if (!auth.isBuilder()) {
return new Err({
code: "unauthorized_deletion",
message: "Only builders can destroy data sources.",
});
}

const { dustAPIProjectId } = dataSource;
if (dataSource.connectorId && dataSource.connectorProvider) {
if (
!MANAGED_DS_DELETABLE.includes(dataSource.connectorProvider) &&
Expand Down Expand Up @@ -119,12 +144,8 @@ export async function deleteDataSource(
}
}

await dataSource.delete(auth, transaction);
await dataSource.delete(auth, { hardDelete: true });

await launchScrubDataSourceWorkflow({
wId: owner.sId,
dustAPIProjectId,
});
if (dataSource.connectorProvider) {
await warnPostDeletion(auth, dataSource.connectorProvider);
}
Expand Down
14 changes: 9 additions & 5 deletions front/lib/api/vaults.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { DataSourceWithAgentsUsageType } from "@dust-tt/types";
import { uniq } from "lodash";

import { deleteDataSource } from "@app/lib/api/data_sources";
import { softDeleteDataSourceAndLaunchScrubWorkflow } from "@app/lib/api/data_sources";
import type { Authenticator } from "@app/lib/auth";
import { DataSourceResource } from "@app/lib/resources/data_source_resource";
import { DataSourceViewResource } from "@app/lib/resources/data_source_view_resource";
Expand Down Expand Up @@ -51,29 +51,33 @@ export const deleteVault = async (
await frontSequelize.transaction(async (t) => {
// delete all data source views
for (const view of dataSourceViews) {
const res = await view.delete(auth, t);
// Soft delete view, they will be hard deleted when the data source scrubbing job runs.
const res = await view.delete(auth, {
transaction: t,
hardDelete: false,
});
if (res.isErr()) {
throw res.error;
}
}

for (const ds of dataSources) {
const res = await deleteDataSource(auth, ds, t);
const res = await softDeleteDataSourceAndLaunchScrubWorkflow(auth, ds, t);
if (res.isErr()) {
throw res.error;
}
}

// delete all vaults groups
for (const group of vault.groups) {
const res = await group.delete(auth, t);
const res = await group.delete(auth, { transaction: t });
if (res.isErr()) {
throw res.error;
}
}

// Finally, delete the vault
const res = await vault.delete(auth, t);
const res = await vault.delete(auth, { transaction: t });
if (res.isErr()) {
throw res.error;
}
Expand Down
68 changes: 46 additions & 22 deletions front/lib/resources/app_resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,29 +150,53 @@ export class AppResource extends ResourceWithVault<App> {

// Deletion.

async delete(auth: Authenticator): Promise<Result<undefined, Error>> {
try {
await frontSequelize.transaction(async (t) => {
await RunResource.deleteAllByAppId(this.id, t);
await Clone.destroy({
where: {
[Op.or]: [{ fromId: this.id }, { toId: this.id }],
},
transaction: t,
});
const res = await DatasetResource.deleteForApp(auth, this, t);
if (res.isErr()) {
// Interrupt the transaction if there was an error deleting datasets.
throw res.error;
}
await this.model.destroy({
where: {
workspaceId: auth.getNonNullableWorkspace().id,
id: this.id,
},
transaction: t,
});
protected async hardDelete(
auth: Authenticator
): Promise<Result<number, Error>> {
const deletedCount = await frontSequelize.transaction(async (t) => {
await RunResource.deleteAllByAppId(this.id, t);

await Clone.destroy({
where: {
[Op.or]: [{ fromId: this.id }, { toId: this.id }],
},
transaction: t,
});
const res = await DatasetResource.deleteForApp(auth, this, t);
if (res.isErr()) {
// Interrupt the transaction if there was an error deleting datasets.
throw res.error;
}

return App.destroy({
where: {
workspaceId: auth.getNonNullableWorkspace().id,
id: this.id,
},
transaction: t,
// Use 'hardDelete: true' to ensure the record is permanently deleted from the database,
// bypassing the soft deletion in place.
hardDelete: true,
});
});

return new Ok(deletedCount);
}

// TODO(2024-09-27 flav): Implement soft delete of apps.
protected softDelete(): Promise<Result<number, Error>> {
throw new Error("Method not implemented.");
}

// TODO(2024-09-27 flav): Implement soft delete of apps.
async delete(
auth: Authenticator,
// eslint-disable-next-line @typescript-eslint/no-unused-vars
{ hardDelete }: { hardDelete: true }
): Promise<Result<undefined, Error>> {
try {
await this.hardDelete(auth);

return new Ok(undefined);
} catch (err) {
return new Err(err as Error);
Expand Down
4 changes: 2 additions & 2 deletions front/lib/resources/base_resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,6 @@ export abstract class BaseResource<M extends Model & ResourceWithId> {

abstract delete(
auth: Authenticator,
transaction?: Transaction
): Promise<Result<undefined, Error>>;
{ transaction }: { transaction?: Transaction }
): Promise<Result<undefined | number, Error>>;
}
7 changes: 1 addition & 6 deletions front/lib/resources/content_fragment_resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,7 @@ export class ContentFragmentResource extends BaseResource<ContentFragmentModel>
* Temporary workaround until we can call this method from the MessageResource.
* @deprecated use the destroy method.
*/
delete(
// eslint-disable-next-line @typescript-eslint/no-unused-vars
auth: Authenticator,
// eslint-disable-next-line @typescript-eslint/no-unused-vars
transaction?: Transaction
): Promise<Result<undefined, Error>> {
delete(): Promise<Result<undefined, Error>> {
throw new Error("Method not implemented.");
}

Expand Down
Loading

0 comments on commit fc9e0cd

Please sign in to comment.