Skip to content

Commit

Permalink
Refactor: simplify mime types and use a common source of truth everyw…
Browse files Browse the repository at this point in the history
…here (#9207)
  • Loading branch information
Fraggle authored Dec 6, 2024
1 parent 52af1a6 commit f3acbf4
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 357 deletions.
4 changes: 2 additions & 2 deletions front/components/data_source/TableUploadOrEditModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import type {
WorkspaceType,
} from "@dust-tt/types";
import {
BIG_FILE_SIZE,
Err,
isBigFileSize,
isSlugified,
MAX_FILE_SIZES,
maxFileSizeToHumanReadable,
Expand Down Expand Up @@ -237,7 +237,7 @@ export const TableUploadOrEditModal = ({
name:
prev.name.length > 0 ? prev.name : stripTableName(selectedFile.name),
}));
setIsBigFile(selectedFile.size > BIG_FILE_SIZE);
setIsBigFile(isBigFileSize(selectedFile.size));
} catch (error) {
sendNotification({
type: "error",
Expand Down
4 changes: 2 additions & 2 deletions front/hooks/useFileUploaderService.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { useSendNotification } from "@dust-tt/sparkle";
import type {
FileUploadedRequestResponseBody,
FileUploadRequestResponseBody,
FileUseCase,
FileUseCaseMetadata,
LightWorkspaceType,
Expand All @@ -19,6 +17,8 @@ import {
import { useState } from "react";

import { getMimeTypeFromFile } from "@app/lib/file";
import type { FileUploadRequestResponseBody } from "@app/pages/api/w/[wId]/files";
import type { FileUploadedRequestResponseBody } from "@app/pages/api/w/[wId]/files/[fileId]";

export interface FileBlob {
contentType: SupportedFileContentType;
Expand Down
5 changes: 1 addition & 4 deletions front/lib/api/assistant/jit_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,7 @@ function isSearchableContentType(
function isListableContentType(
contentType: SupportedContentFragmentType
): boolean {
// We allow listing all content-types that are not images. Note that
// `isSupportedPlainTextContentType` is not enough because it is limited to uploadable (as in from
// the conversation) content types which does not cover all non image content types that we
// support in the API such as `dust-application/slack`.
// We allow listing all content-types that are not images.
return !isSupportedImageContentType(contentType);
}

Expand Down
189 changes: 74 additions & 115 deletions front/lib/api/files/upload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type {
SupportedFileContentType,
} from "@dust-tt/types";
import {
assertNever,
Err,
isTextExtractionSupportedContentType,
Ok,
Expand All @@ -28,18 +29,6 @@ import logger from "@app/logger/logger";

const UPLOAD_DELAY_AFTER_CREATION_MS = 1000 * 60 * 1; // 1 minute.

const notSupportedError: ProcessingFunction = async (
auth: Authenticator,
file: FileResource
) => {
return new Err(
new Error(
"Processing not supported for " +
`content type ${file.contentType} and use case ${file.useCase}`
)
);
};

// Upload to public bucket.

const uploadToPublicBucket: ProcessingFunction = async (
Expand Down Expand Up @@ -291,121 +280,91 @@ type ProcessingFunction = (
file: FileResource
) => Promise<Result<undefined, Error>>;

type ProcessingPerUseCase = {
[k in FileUseCase]: ProcessingFunction | undefined;
};
const getProcessingFunction = ({
contentType,
useCase,
}: {
contentType: SupportedFileContentType;
useCase: FileUseCase;
}): ProcessingFunction | undefined => {
switch (contentType) {
case "image/jpeg":
case "image/png":
if (useCase === "conversation") {
return resizeAndUploadToFileStorage;
} else if (useCase === "avatar") {
return uploadToPublicBucket;
}
break;

case "application/msword":
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
case "application/pdf":
if (useCase === "conversation" || useCase === "folder_document") {
return extractTextFromFileAndUpload;
}
break;
case "text/plain":
case "text/markdown":
if (
useCase === "conversation" ||
useCase === "folder_document" ||
useCase === "tool_output"
) {
return storeRawText;
}
break;
case "text/vnd.dust.attachment.slack.thread":
if (useCase === "conversation") {
return storeRawText;
}
break;
case "text/comma-separated-values":
case "text/csv":
case "text/tab-separated-values":
case "text/tsv":
if (useCase === "conversation" || useCase === "folder_table") {
// TODO(JIT): after JIT enablement, store raw text here too, the snippet is useless
return extractContentAndSchemaFromDelimitedTextFiles;
} else if (useCase === "folder_document" || useCase === "tool_output") {
return storeRawText;
}
break;

default:
assertNever(contentType);
}

type ProcessingPerContentType = {
[k in SupportedFileContentType]: ProcessingPerUseCase | undefined;
return undefined;
};

const processingPerContentType: ProcessingPerContentType = {
"application/msword": {
conversation: extractTextFromFileAndUpload,
folder_document: extractTextFromFileAndUpload,
folder_table: notSupportedError,
avatar: notSupportedError,
tool_output: notSupportedError,
},
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": {
folder_document: extractTextFromFileAndUpload,
folder_table: notSupportedError,
conversation: extractTextFromFileAndUpload,
avatar: notSupportedError,
tool_output: notSupportedError,
},
"application/pdf": {
folder_document: extractTextFromFileAndUpload,
folder_table: notSupportedError,
conversation: extractTextFromFileAndUpload,
avatar: notSupportedError,
tool_output: notSupportedError,
},
"image/jpeg": {
conversation: resizeAndUploadToFileStorage,
folder_document: notSupportedError,
folder_table: notSupportedError,
avatar: uploadToPublicBucket,
tool_output: storeRawText,
},
"image/png": {
conversation: resizeAndUploadToFileStorage,
folder_document: notSupportedError,
folder_table: notSupportedError,
avatar: uploadToPublicBucket,
tool_output: notSupportedError,
},
"text/comma-separated-values": {
conversation: extractContentAndSchemaFromDelimitedTextFiles,
folder_document: storeRawText,
folder_table: extractContentAndSchemaFromDelimitedTextFiles,
avatar: notSupportedError,
tool_output: storeRawText,
},
"text/csv": {
conversation: extractContentAndSchemaFromDelimitedTextFiles,
folder_document: storeRawText,
folder_table: extractContentAndSchemaFromDelimitedTextFiles,
avatar: notSupportedError,
tool_output: storeRawText,
},
"text/markdown": {
conversation: storeRawText,
folder_document: storeRawText,
folder_table: notSupportedError,
avatar: notSupportedError,
tool_output: storeRawText,
},
"text/plain": {
conversation: storeRawText,
folder_document: storeRawText,
folder_table: notSupportedError,
avatar: notSupportedError,
tool_output: storeRawText,
},
"text/tab-separated-values": {
conversation: extractContentAndSchemaFromDelimitedTextFiles,
folder_document: storeRawText,
folder_table: extractContentAndSchemaFromDelimitedTextFiles,
avatar: notSupportedError,
tool_output: storeRawText,
},
"text/tsv": {
conversation: extractContentAndSchemaFromDelimitedTextFiles,
folder_document: storeRawText,
folder_table: extractContentAndSchemaFromDelimitedTextFiles,
avatar: notSupportedError,
tool_output: storeRawText,
},
"text/vnd.dust.attachment.slack.thread": {
conversation: storeRawText,
folder_document: notSupportedError,
folder_table: notSupportedError,
avatar: notSupportedError,
tool_output: notSupportedError,
},
export const isUploadSupported = (arg: {
contentType: SupportedFileContentType;
useCase: FileUseCase;
}): boolean => {
const processing = getProcessingFunction(arg);
return !!processing;
};

const maybeApplyProcessing: ProcessingFunction = async (
auth: Authenticator,
file: FileResource
) => {
const contentTypeProcessing = processingPerContentType[file.contentType];
if (!contentTypeProcessing) {
return new Ok(undefined);
const processing = getProcessingFunction(file);
if (!processing) {
return new Err(
new Error(
`Processing not supported for content type ${file.contentType} and use case ${file.useCase}`
)
);
}

const processing = contentTypeProcessing[file.useCase];
if (processing) {
const res = await processing(auth, file);
if (res.isErr()) {
return res;
} else {
return new Ok(undefined);
}
const res = await processing(auth, file);
if (res.isErr()) {
return res;
} else {
return new Ok(undefined);
}

return new Ok(undefined);
};

export async function processAndStoreFile(
Expand Down
Loading

0 comments on commit f3acbf4

Please sign in to comment.