diff --git a/core/src/data_sources/data_source.rs b/core/src/data_sources/data_source.rs index 812c12204eb0..5c30b8bb0551 100644 --- a/core/src/data_sources/data_source.rs +++ b/core/src/data_sources/data_source.rs @@ -25,7 +25,7 @@ use std::collections::HashMap; use std::fmt; use std::str::FromStr; use tokio_stream::{self as stream}; -use tracing::{error, info}; +use tracing::{error, info, warn}; use uuid::Uuid; /// Section is used to represent the structure of document to be taken into account during chunking. @@ -604,6 +604,22 @@ impl DataSource { ))?; } + if parents.is_empty() { + warn!( + document_id = document_id, + timestamp = ?timestamp, + parents = ?parents, + "Upserting a document without any parent" + ); + } else if parents[0] != document_id { + warn!( + document_id = document_id, + timestamp = ?timestamp, + parents = ?parents, + "Upserting a document that is not self-referenced as its parent" + ); + } + let store = store.clone(); let current_system_tags = if preserve_system_tags { diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts index bef1409950c3..89cc947fdde3 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts @@ -5,9 +5,12 @@ import type { } from "@dust-tt/client"; import { PostDataSourceDocumentRequestSchema } from "@dust-tt/client"; import type { WithAPIErrorResponse } from "@dust-tt/types"; -import { rateLimiter, sectionFullText } from "@dust-tt/types"; -import { dustManagedCredentials } from "@dust-tt/types"; -import { CoreAPI } from "@dust-tt/types"; +import { + CoreAPI, + dustManagedCredentials, + rateLimiter, + sectionFullText, +} from "@dust-tt/types"; import type { NextApiRequest, NextApiResponse } from "next"; import { withPublicAPIAuthentication } from "@app/lib/api/auth_wrappers"; @@ -22,7 +25,7 @@ import { } from "@app/lib/upsert_queue"; import { validateUrl } from "@app/lib/utils"; import logger from "@app/logger/logger"; -import { apiError } from "@app/logger/withlogging"; +import { apiError, statsDClient } from "@app/logger/withlogging"; import { launchRunPostDeleteHooksWorkflow } from "@app/temporal/documents_post_process_hooks/client"; export const config = { @@ -472,6 +475,25 @@ async function handler( }); } + const statsDTags = [ + `data_source_id:${dataSource.id}`, + `workspace_id:${owner.sId}`, + `data_source_name:${dataSource.name}`, + ]; + if (!r.data.parents || r.data.parents.length === 0) { + statsDClient.increment( + "document_without_a_parent.count", + 1, + statsDTags + ); + } else if (r.data.parents[0] != req.query.documentId) { + statsDClient.increment( + "document_without_a_parent.count", + 1, + statsDTags + ); + } + if (r.data.async === true) { const enqueueRes = await enqueueUpsertDocument({ upsertDocument: {