From 722f5d8dde2867b8b8bedd7444f165d38e418abb Mon Sep 17 00:00:00 2001 From: Thomas Draier Date: Fri, 13 Dec 2024 09:05:22 +0100 Subject: [PATCH] [front] Restore search endpoint with proper filtering (#9313) --- front/lib/api/data_sources.ts | 12 + .../data_source_views/[dsvId]/search.ts | 243 ++++++++++++++++++ front/public/swagger.json | 199 ++++++++++++++ types/src/front/lib/core_api.ts | 2 + 4 files changed, 456 insertions(+) create mode 100644 front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_source_views/[dsvId]/search.ts diff --git a/front/lib/api/data_sources.ts b/front/lib/api/data_sources.ts index 67e151311141..e3db27e54693 100644 --- a/front/lib/api/data_sources.ts +++ b/front/lib/api/data_sources.ts @@ -535,9 +535,11 @@ export async function upsertTable({ export async function handleDataSourceSearch({ searchQuery, dataSource, + dataSourceView, }: { searchQuery: DataSourceSearchQuery; dataSource: DataSourceResource; + dataSourceView?: DataSourceViewResource; }): Promise< Result< DataSourceSearchResponseType, @@ -570,6 +572,16 @@ export async function handleDataSourceSearch({ lt: searchQuery.timestamp_lt ?? null, }, }, + view_filter: dataSourceView + ? { + parents: { + in: dataSourceView.parentsIn, + not: [], + }, + tags: null, + timestamp: null, + } + : undefined, credentials: credentials, } ); diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_source_views/[dsvId]/search.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_source_views/[dsvId]/search.ts new file mode 100644 index 000000000000..14d75398bd95 --- /dev/null +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_source_views/[dsvId]/search.ts @@ -0,0 +1,243 @@ +import type { DataSourceSearchResponseType } from "@dust-tt/client"; +import { DataSourceSearchQuerySchema } from "@dust-tt/client"; +import type { WithAPIErrorResponse } from "@dust-tt/types"; +import { assertNever } from "@dust-tt/types"; +import type { NextApiRequest, NextApiResponse } from "next"; +import { fromError } from "zod-validation-error"; + +import { withPublicAPIAuthentication } from "@app/lib/api/auth_wrappers"; +import { handleDataSourceSearch } from "@app/lib/api/data_sources"; +import { withResourceFetchingFromRoute } from "@app/lib/api/resource_wrappers"; +import type { Authenticator } from "@app/lib/auth"; +import { DataSourceViewResource } from "@app/lib/resources/data_source_view_resource"; +import type { SpaceResource } from "@app/lib/resources/space_resource"; +import { apiError } from "@app/logger/withlogging"; + +/** + * @swagger + * /api/v1/w/{wId}/spaces/{spaceId}/data_source_views/{dsvId}/search: + * get: + * summary: Search the data source view + * description: Search the data source view identified by {dsvId} in the workspace identified by {wId}. + * tags: + * - DatasourceViews + * security: + * - BearerAuth: [] + * parameters: + * - in: path + * name: wId + * required: true + * description: ID of the workspace + * schema: + * type: string + * - in: path + * name: spaceId + * required: true + * description: ID of the space + * schema: + * type: string + * - in: path + * name: dsvId + * required: true + * description: ID of the data source view + * schema: + * type: string + * - in: query + * name: query + * required: true + * description: The search query + * schema: + * type: string + * - in: query + * name: top_k + * required: true + * description: The number of results to return + * schema: + * type: number + * - in: query + * name: full_text + * required: true + * description: Whether to return the full document content + * schema: + * type: boolean + * - in: query + * name: target_document_tokens + * required: false + * description: The number of tokens in the target document + * schema: + * type: number + * - in: query + * name: timestamp_gt + * required: false + * description: The timestamp to filter by + * schema: + * type: number + * - in: query + * name: timestamp_lt + * required: false + * description: The timestamp to filter by + * schema: + * type: number + * - in: query + * name: tags_in + * required: false + * description: The tags to filter by + * schema: + * type: string + * - in: query + * name: tags_not + * required: false + * description: The tags to filter by + * schema: + * type: string + * - in: query + * name: parents_in + * required: false + * description: The parents to filter by + * schema: + * type: string + * - in: query + * name: parents_not + * required: false + * description: The parents to filter by + * schema: + * type: string + * responses: + * 200: + * description: The documents + * content: + * application/json: + * schema: + * type: object + * properties: + * documents: + * type: array + * items: + * type: object + * properties: + * id: + * type: string + * description: ID of the document + * title: + * type: string + * description: Title of the document + * content: + * type: string + * description: Content of the document + * tags: + * type: array + * items: + * type: string + * description: Tags of the document + * parents: + * type: array + * items: + * type: string + * description: Parents of the document + * timestamp: + * type: number + * description: Timestamp of the document + * data: + * type: object + * description: Data of the document + * score: + * type: number + * description: Score of the document + * 400: + * description: Invalid request error + * 405: + * description: Method not supported error + */ +async function handler( + req: NextApiRequest, + res: NextApiResponse>, + auth: Authenticator, + space: SpaceResource +): Promise { + const { dsvId } = req.query; + if (typeof dsvId !== "string") { + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: "Invalid path parameters.", + }, + }); + } + + const dataSourceView = await DataSourceViewResource.fetchById(auth, dsvId); + if (!dataSourceView || dataSourceView.space.sId !== space.sId) { + return apiError(req, res, { + status_code: 404, + api_error: { + type: "data_source_not_found", + message: "The data source you requested was not found.", + }, + }); + } + + switch (req.method) { + case "GET": { + // I could not find a way to make the query params be an array if there is only one tag. + if (req.query.tags_in && typeof req.query.tags_in === "string") { + req.query.tags_in = [req.query.tags_in]; + } + if (req.query.tags_not && typeof req.query.tags_not === "string") { + req.query.tags_not = [req.query.tags_not]; + } + if (req.query.parents_in && typeof req.query.parents_in === "string") { + req.query.parents_in = [req.query.parents_in]; + } + if (req.query.parents_not && typeof req.query.parents_not === "string") { + req.query.parents_not = [req.query.parents_not]; + } + + const r = DataSourceSearchQuerySchema.safeParse(req.query); + + if (r.error) { + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: fromError(r.error).toString(), + }, + }); + } + const searchQuery = r.data; + const s = await handleDataSourceSearch({ + searchQuery, + dataSource: dataSourceView.dataSource, + dataSourceView, + }); + if (s.isErr()) { + switch (s.error.code) { + case "data_source_error": + return apiError(req, res, { + status_code: 400, + api_error: { + type: "data_source_error", + message: s.error.message, + }, + }); + default: + assertNever(s.error.code); + } + } + + return res.status(200).json(s.value); + } + + default: + return apiError(req, res, { + status_code: 405, + api_error: { + type: "method_not_supported_error", + message: "The method passed is not supported, GET is expected.", + }, + }); + } +} + +export default withPublicAPIAuthentication( + withResourceFetchingFromRoute(handler, "space") +); diff --git a/front/public/swagger.json b/front/public/swagger.json index 47f72640e807..7a8a858239bc 100644 --- a/front/public/swagger.json +++ b/front/public/swagger.json @@ -1570,6 +1570,205 @@ } } }, + "/api/v1/w/{wId}/spaces/{spaceId}/data_source_views/{dsvId}/search": { + "get": { + "summary": "Search the data source view", + "description": "Search the data source view identified by {dsvId} in the workspace identified by {wId}.", + "tags": [ + "DatasourceViews" + ], + "security": [ + { + "BearerAuth": [] + } + ], + "parameters": [ + { + "in": "path", + "name": "wId", + "required": true, + "description": "ID of the workspace", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "spaceId", + "required": true, + "description": "ID of the space", + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "dsvId", + "required": true, + "description": "ID of the data source view", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "query", + "required": true, + "description": "The search query", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "top_k", + "required": true, + "description": "The number of results to return", + "schema": { + "type": "number" + } + }, + { + "in": "query", + "name": "full_text", + "required": true, + "description": "Whether to return the full document content", + "schema": { + "type": "boolean" + } + }, + { + "in": "query", + "name": "target_document_tokens", + "required": false, + "description": "The number of tokens in the target document", + "schema": { + "type": "number" + } + }, + { + "in": "query", + "name": "timestamp_gt", + "required": false, + "description": "The timestamp to filter by", + "schema": { + "type": "number" + } + }, + { + "in": "query", + "name": "timestamp_lt", + "required": false, + "description": "The timestamp to filter by", + "schema": { + "type": "number" + } + }, + { + "in": "query", + "name": "tags_in", + "required": false, + "description": "The tags to filter by", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "tags_not", + "required": false, + "description": "The tags to filter by", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "parents_in", + "required": false, + "description": "The parents to filter by", + "schema": { + "type": "string" + } + }, + { + "in": "query", + "name": "parents_not", + "required": false, + "description": "The parents to filter by", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "The documents", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "documents": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "ID of the document" + }, + "title": { + "type": "string", + "description": "Title of the document" + }, + "content": { + "type": "string", + "description": "Content of the document" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Tags of the document" + }, + "parents": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Parents of the document" + }, + "timestamp": { + "type": "number", + "description": "Timestamp of the document" + }, + "data": { + "type": "object", + "description": "Data of the document" + }, + "score": { + "type": "number", + "description": "Score of the document" + } + } + } + } + } + } + } + } + }, + "400": { + "description": "Invalid request error" + }, + "405": { + "description": "Method not supported error" + } + } + } + }, "/api/v1/w/{wId}/spaces/{spaceId}/data_source_views": { "get": { "summary": "List Data Source Views", diff --git a/types/src/front/lib/core_api.ts b/types/src/front/lib/core_api.ts index 334af63aab6b..21f8943e51b3 100644 --- a/types/src/front/lib/core_api.ts +++ b/types/src/front/lib/core_api.ts @@ -654,6 +654,7 @@ export class CoreAPI { query: string; topK: number; filter?: CoreAPISearchFilter | null; + view_filter?: CoreAPISearchFilter | null; fullText: boolean; credentials: { [key: string]: string }; target_document_tokens?: number | null; @@ -672,6 +673,7 @@ export class CoreAPI { query: payload.query, top_k: payload.topK, filter: payload.filter, + view_filter: payload.view_filter, full_text: payload.fullText, credentials: payload.credentials, target_document_tokens: payload.target_document_tokens,