Skip to content

Commit

Permalink
[JIT Datasources] Restrict data source block access to conversations …
Browse files Browse the repository at this point in the history
…space II (#8849)

* [JIT Datasources] Restrict data source block access to conversations space II

Description
---
Fixes dust-tt/tasks#1658 (again)

Following #8815 , @seb identified an issue described [here](https://dust4ai.slack.com/archives/C080VUG3PQR/p1732275137389959?thread_ts=1732271409.175599&cid=C080VUG3PQR)

A new approach is taken: pass a flag to core indicating if the call is system. Core can then ask the registry to allow or not datasources from the "conversations" space.

- for the front v1 app/runs -> core call, adding the flag as a dust header. It's immutably decided by the nature of the API key (and feels more secure than leaving it open to callers)
- for the lookup call, adding as a param to the API query, it seems like a parameter callers from core might want to set / unset

Risk
---
none, not yet used in prod (right @seb?)

Deploy
---
front
core

* renaming 🍿

* renaming 2 🍿

* renaming 3 🍿
  • Loading branch information
philipperolet authored Nov 22, 2024
1 parent fe92e6a commit 989a954
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 29 deletions.
22 changes: 22 additions & 0 deletions core/bin/dust_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,17 @@ async fn runs_create(
None => (),
};

// If the run is made by a system key, it's a system run
match headers.get("X-Dust-IsSystemRun") {
Some(v) => match v.to_str() {
Ok(v) => {
credentials.insert("DUST_IS_SYSTEM_RUN".to_string(), v.to_string());
}
_ => (),
},
None => (),
};

match run_helper(project_id, payload.clone(), state.clone()).await {
Ok(app) => {
// The run is empty for now, we can clone it for the response.
Expand Down Expand Up @@ -862,6 +873,17 @@ async fn runs_create_stream(
None => (),
};

// If the run is made by a system key, it's a system run
match headers.get("X-Dust-IsSystemRun") {
Some(v) => match v.to_str() {
Ok(v) => {
credentials.insert("DUST_IS_SYSTEM_RUN".to_string(), v.to_string());
}
_ => (),
},
None => (),
};

// create unbounded channel to pass as stream to Sse::new
let (tx, mut rx) = unbounded_channel::<Value>();

Expand Down
4 changes: 4 additions & 0 deletions core/src/blocks/data_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,15 @@ impl DataSource {
filter: Option<SearchFilter>,
target_document_tokens: Option<usize>,
) -> Result<Vec<Document>> {
let is_system_run =
env.credentials.get("DUST_IS_SYSTEM_RUN") == Some(&String::from("true"));

let (data_source_project, view_filter, data_source_id) =
get_data_source_project_and_view_filter(
&workspace_id,
&data_source_or_data_source_view_id,
env,
is_system_run,
)
.await?;

Expand Down
9 changes: 8 additions & 1 deletion core/src/blocks/database_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,17 @@ pub async fn load_tables_from_identifiers(
.unique()
.collect::<Vec<_>>();

let is_system_run = env.credentials.get("DUST_IS_SYSTEM_RUN") == Some(&String::from("true"));

// Get a vec of the corresponding project ids for each (workspace_id, data_source_id) pair.
let project_ids_view_filters = try_join_all(data_source_identifiers.iter().map(
|(workspace_id, data_source_or_view_id)| {
get_data_source_project_and_view_filter(workspace_id, data_source_or_view_id, env)
get_data_source_project_and_view_filter(
workspace_id,
data_source_or_view_id,
env,
is_system_run,
)
},
))
.await?;
Expand Down
7 changes: 6 additions & 1 deletion core/src/blocks/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ pub async fn get_data_source_project_and_view_filter(
workspace_id: &String,
data_source_id: &String,
env: &Env,
// true for our packaged dust-apps called internally, see
// https://github.com/dust-tt/tasks/issues/1658
is_system_run: bool,
) -> Result<(Project, Option<SearchFilter>, String)> {
let dust_workspace_id = match env.credentials.get("DUST_WORKSPACE_ID") {
None => Err(anyhow!(
Expand Down Expand Up @@ -47,11 +50,13 @@ pub async fn get_data_source_project_and_view_filter(
};

let url = format!(
"{}/api/registry/data_sources/lookup?workspace_id={}&data_source_id={}",
"{}/api/registry/data_sources/lookup?workspace_id={}&data_source_id={}&is_system_run={}",
front_api.as_str(),
encode(&workspace_id),
encode(&data_source_id),
is_system_run.to_string(),
);

let parsed_url = Url::parse(url.as_str())?;

let res = reqwest::Client::new()
Expand Down
59 changes: 32 additions & 27 deletions front/pages/api/registry/[type]/lookup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ async function handler(

const dustGroupIds = rawDustGroupIds.split(",");

// by default, data sources from the "conversations" space are not allowed
// except for our packaged dust-apps called internally, see
// https://github.com/dust-tt/tasks/issues/1658 in particular
// "assistant-retrieval-v2" that needs access to the conversation space we
// determine that we are on packaged apps by checking whether this is a system
// run

const allowConversationsDataSources = req.query.is_system_run === "true";

switch (req.method) {
case "GET":
switch (req.query.type) {
Expand Down Expand Up @@ -111,7 +120,8 @@ async function handler(
) {
const dataSourceViewRes = await handleDataSourceView(
auth,
dataSourceOrDataSourceViewId
dataSourceOrDataSourceViewId,
allowConversationsDataSources
);
if (dataSourceViewRes.isErr()) {
logger.info(
Expand All @@ -131,7 +141,8 @@ async function handler(
} else {
const dataSourceRes = await handleDataSource(
auth,
dataSourceOrDataSourceViewId
dataSourceOrDataSourceViewId,
allowConversationsDataSources
);
if (dataSourceRes.isErr()) {
logger.info(
Expand Down Expand Up @@ -174,25 +185,19 @@ export default withLogging(handler);

async function handleDataSourceView(
auth: Authenticator,
dataSourceViewId: string
dataSourceViewId: string,
allowConversationsDataSources: boolean
): Promise<Result<LookupDataSourceResponseBody, Error>> {
const dataSourceView = await DataSourceViewResource.fetchById(
auth,
dataSourceViewId
);

// This check is meant to block access to "conversations" space through a
// datasource block in a dust app, which could lead to data leaks, see related PR
// https://github.com/dust-tt/dust/pull/8815
// Only case in which this is allowed is for our packaged apps, via a system
// key, in particular "assistant-retrieval-v2" that needs access to the
// conversation space
const forbiddenAccessToConversations =
dataSourceView?.space?.kind === "conversations" &&
!auth.isSystemKey() &&
false; // Check disabled as it doesn't work as expected

if (!dataSourceView || forbiddenAccessToConversations) {
if (
!dataSourceView ||
(!allowConversationsDataSources &&
dataSourceView.space?.kind === "conversations")
) {
return new Err(new Error("Data source view not found."));
}

Expand All @@ -218,7 +223,8 @@ async function handleDataSourceView(

async function handleDataSource(
auth: Authenticator,
dataSourceId: string
dataSourceId: string,
allowConversationsDataSources: boolean
): Promise<Result<LookupDataSourceResponseBody, Error>> {
logger.info(
{
Expand All @@ -240,16 +246,11 @@ async function handleDataSource(
{ origin: "registry_lookup" }
);

// This check is meant to block access to "conversations" space through a
// datasource block in a dust app, which could lead to data leaks, see related PR
// https://github.com/dust-tt/dust/pull/8815
// Only case in which this is allowed is for our packaged apps, via a system
// key, in particular "assistant-retrieval-v2" that needs access to the
// conversation space
const forbiddenAccessToConversations =
dataSource?.space?.kind === "conversations" && !auth.isSystemKey() && false; // Check disabled as it doesn't work as expected

if (!dataSource || forbiddenAccessToConversations) {
if (
!dataSource ||
(!allowConversationsDataSources &&
dataSource.space?.kind === "conversations")
) {
return new Err(new Error("Data source not found."));
}

Expand All @@ -264,7 +265,11 @@ async function handleDataSource(
globalSpace
);

return handleDataSourceView(auth, dataSourceView[0].sId);
return handleDataSourceView(
auth,
dataSourceView[0].sId,
allowConversationsDataSources
);
}

if (dataSource.canRead(auth)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ async function handler(
inputs,
credentials,
secrets,
isSystemKey: auth.isSystemKey(),
}
);

Expand Down
5 changes: 5 additions & 0 deletions types/src/front/lib/core_api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ type CoreAPICreateRunParams = {
config: RunConfig;
credentials: CredentialsType;
secrets: DustAppSecretType[];
isSystemKey?: boolean;
};

type GetDatasetResponse = {
Expand Down Expand Up @@ -300,6 +301,7 @@ export class CoreAPI {
config,
credentials,
secrets,
isSystemKey,
}: CoreAPICreateRunParams
): Promise<CoreAPIResponse<{ run: CoreAPIRun }>> {
const response = await this._fetchWithError(
Expand All @@ -310,6 +312,7 @@ export class CoreAPI {
"Content-Type": "application/json",
"X-Dust-Workspace-Id": workspace.sId,
"X-Dust-Group-Ids": groups.map((g) => g.sId).join(","),
"X-Dust-IsSystemRun": isSystemKey ? "true" : "false",
},
body: JSON.stringify({
run_type: runType,
Expand Down Expand Up @@ -340,6 +343,7 @@ export class CoreAPI {
config,
credentials,
secrets,
isSystemKey,
}: CoreAPICreateRunParams
): Promise<
CoreAPIResponse<{
Expand All @@ -355,6 +359,7 @@ export class CoreAPI {
"Content-Type": "application/json",
"X-Dust-Workspace-Id": workspace.sId,
"X-Dust-Group-Ids": groups.map((g) => g.sId).join(","),
"X-Dust-IsSystemRun": isSystemKey ? "true" : "false",
},
body: JSON.stringify({
run_type: runType,
Expand Down

0 comments on commit 989a954

Please sign in to comment.