From bae9884029f1f8ff37ab34ca47cfa0aa859a2312 Mon Sep 17 00:00:00 2001 From: Philippe Rolet Date: Thu, 28 Nov 2024 13:52:01 +0100 Subject: [PATCH] [Webcrawler] Avoid null bytes in tags (#8993) Description --- Fixes issue from [thread](https://dust4ai.slack.com/archives/C05F84CFP0E/p1732793680391139) (in addition to PR #8869) Risks --- na Deploy --- connectors --- connectors/src/connectors/webcrawler/temporal/activities.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connectors/src/connectors/webcrawler/temporal/activities.ts b/connectors/src/connectors/webcrawler/temporal/activities.ts index 9e8e3e6307f6..5d4e419c39fe 100644 --- a/connectors/src/connectors/webcrawler/temporal/activities.ts +++ b/connectors/src/connectors/webcrawler/temporal/activities.ts @@ -327,7 +327,7 @@ export async function crawlWebsiteByConnectorId(connectorId: ModelId) { }), documentUrl: request.url, timestampMs: new Date().getTime(), - tags: [`title:${pageTitle}`], + tags: [`title:${stripNullBytes(pageTitle)}`], parents: getParentsForPage(request.url, false), upsertContext: { sync_type: "batch",