Skip to content

Commit

Permalink
Log empty or invalid parents array for documents (#9011)
Browse files Browse the repository at this point in the history
* 🔊 add logging in core to detect empty parents or parents not self-refed

* 🔊 add logging in front
  • Loading branch information
aubin-tchoi authored Nov 28, 2024
1 parent 4b8a9bf commit 01fee61
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 5 deletions.
18 changes: 17 additions & 1 deletion core/src/data_sources/data_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use tokio_stream::{self as stream};
use tracing::{error, info};
use tracing::{error, info, warn};
use uuid::Uuid;

/// Section is used to represent the structure of document to be taken into account during chunking.
Expand Down Expand Up @@ -604,6 +604,22 @@ impl DataSource {
))?;
}

if parents.is_empty() {
warn!(
document_id = document_id,
timestamp = ?timestamp,
parents = ?parents,
"Upserting a document without any parent"
);
} else if parents[0] != document_id {
warn!(
document_id = document_id,
timestamp = ?timestamp,
parents = ?parents,
"Upserting a document that is not self-referenced as its parent"
);
}

let store = store.clone();

let current_system_tags = if preserve_system_tags {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ import type {
} from "@dust-tt/client";
import { PostDataSourceDocumentRequestSchema } from "@dust-tt/client";
import type { WithAPIErrorResponse } from "@dust-tt/types";
import { rateLimiter, sectionFullText } from "@dust-tt/types";
import { dustManagedCredentials } from "@dust-tt/types";
import { CoreAPI } from "@dust-tt/types";
import {
CoreAPI,
dustManagedCredentials,
rateLimiter,
sectionFullText,
} from "@dust-tt/types";
import type { NextApiRequest, NextApiResponse } from "next";

import { withPublicAPIAuthentication } from "@app/lib/api/auth_wrappers";
Expand All @@ -22,7 +25,7 @@ import {
} from "@app/lib/upsert_queue";
import { validateUrl } from "@app/lib/utils";
import logger from "@app/logger/logger";
import { apiError } from "@app/logger/withlogging";
import { apiError, statsDClient } from "@app/logger/withlogging";
import { launchRunPostDeleteHooksWorkflow } from "@app/temporal/documents_post_process_hooks/client";

export const config = {
Expand Down Expand Up @@ -472,6 +475,25 @@ async function handler(
});
}

const statsDTags = [
`data_source_id:${dataSource.id}`,
`workspace_id:${owner.sId}`,
`data_source_name:${dataSource.name}`,
];
if (!r.data.parents || r.data.parents.length === 0) {
statsDClient.increment(
"document_without_a_parent.count",
1,
statsDTags
);
} else if (r.data.parents[0] != req.query.documentId) {
statsDClient.increment(
"document_without_a_parent.count",
1,
statsDTags
);
}

if (r.data.async === true) {
const enqueueRes = await enqueueUpsertDocument({
upsertDocument: {
Expand Down

0 comments on commit 01fee61

Please sign in to comment.