Skip to content

Commit

Permalink
fix(tables query): use a limit in number of chars for the CSV snippet (
Browse files Browse the repository at this point in the history
…#8215)

Co-authored-by: Henry Fontanier <[email protected]>
  • Loading branch information
fontanierh and Henry Fontanier authored Oct 24, 2024
1 parent 27d5345 commit 4276b64
Showing 1 changed file with 52 additions and 16 deletions.
68 changes: 52 additions & 16 deletions front/lib/api/assistant/actions/tables_query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ import logger from "@app/logger/logger";
const TABLES_QUERY_MIN_TOKEN = 28_000;
const RENDERED_CONVERSATION_MIN_TOKEN = 4_000;

// Max number of lines from the CSV ouptut file that will be saved in the DB
// and inlined in the rendered conversation.
const MAX_SNIPPET_LINES_QUERY_RESULT_FILE = 128;
// Max number of characters in the snippet.
const MAX_SNIPPET_CHARS = 16384;

interface TablesQueryActionBlob {
id: ModelId; // AgentTablesQueryAction.
Expand Down Expand Up @@ -604,18 +603,18 @@ async function getTablesQueryOutputCsvFileAndSnippet(
snippet: string;
}> {
const toCsv = (
records: Array<Record<string, string | number | boolean>>
records: Array<Record<string, string | number | boolean>>,
options: { header: boolean } = { header: true }
): Promise<string> => {
return new Promise((resolve, reject) => {
stringify(records, { header: true }, (err, data) => {
stringify(records, options, (err, data) => {
if (err) {
reject(err);
}
resolve(data);
});
});
};

const csvOutput = await toCsv(results);

const file = await internalCreateToolOutputCsvFile(auth, {
Expand All @@ -624,17 +623,54 @@ async function getTablesQueryOutputCsvFileAndSnippet(
contentType: "text/csv",
});

let snippetOuptut = `TOTAL_LINES: ${results.length}\n`;
if (results.length > MAX_SNIPPET_LINES_QUERY_RESULT_FILE) {
snippetOuptut += `Showing the first ${MAX_SNIPPET_LINES_QUERY_RESULT_FILE} lines of the results.\n\n`;
snippetOuptut += await toCsv(
results.slice(0, MAX_SNIPPET_LINES_QUERY_RESULT_FILE)
);
snippetOuptut += `\n... (${results.length - MAX_SNIPPET_LINES_QUERY_RESULT_FILE} lines omitted)\n`;
if (results.length === 0) {
return { file, snippet: "TOTAL_LINES: 0\n(empty result set)\n" };
}

let snippetOutput = `TOTAL_LINES: ${results.length}\n`;
let currentCharCount = snippetOutput.length;
let linesIncluded = 0;

const truncationString = "(...truncated)";
const endOfSnippetString = (omitted: number) =>
omitted > 0 ? `\n(${omitted} lines omitted)\n` : "\n(end of file)\n";

// Process header
const header = csvOutput.split("\n")[0];
if (currentCharCount + header.length + 1 <= MAX_SNIPPET_CHARS) {
snippetOutput += header + "\n";
currentCharCount += header.length + 1;
} else {
snippetOuptut += await toCsv(results);
snippetOuptut += `\n(end of file)\n`;
const remainingChars =
MAX_SNIPPET_CHARS - currentCharCount - truncationString.length;
if (remainingChars > 0) {
snippetOutput += header.slice(0, remainingChars) + truncationString;
}
snippetOutput += endOfSnippetString(results.length);
return { file, snippet: snippetOutput };
}

// Process data rows
for (const row of results) {
const rowCsv = await toCsv([row], { header: false });
const trimmedRowCsv = rowCsv.trim(); // Remove trailing newline
if (currentCharCount + trimmedRowCsv.length + 1 <= MAX_SNIPPET_CHARS) {
snippetOutput += trimmedRowCsv + "\n";
currentCharCount += trimmedRowCsv.length + 1;
linesIncluded++;
} else {
const remainingChars =
MAX_SNIPPET_CHARS - currentCharCount - truncationString.length;
if (remainingChars > 0) {
snippetOutput +=
trimmedRowCsv.slice(0, remainingChars) + truncationString;
linesIncluded++;
}
break;
}
}

return { file, snippet: snippetOuptut };
const linesOmitted = results.length - linesIncluded;
snippetOutput += endOfSnippetString(linesOmitted);
return { file, snippet: snippetOutput };
}

0 comments on commit 4276b64

Please sign in to comment.