From 02f6662a49ec6b64b262bb93b6f15b22458a611a Mon Sep 17 00:00:00 2001 From: Omar Abdelkader <omikader@gmail.com> Date: Thu, 16 Nov 2023 20:20:05 -0500 Subject: [PATCH] feat(apify): support Document[] return type for mapping function (#3262) * feat(apify): support Document[] for mapping function * misc: make it backwards compatible --- .../src/document_loaders/web/apify_dataset.ts | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/langchain/src/document_loaders/web/apify_dataset.ts b/langchain/src/document_loaders/web/apify_dataset.ts index b5884c888a8c..977bdd725b56 100644 --- a/langchain/src/document_loaders/web/apify_dataset.ts +++ b/langchain/src/document_loaders/web/apify_dataset.ts @@ -15,11 +15,17 @@ import { getEnvironmentVariable } from "../../util/env.js"; /** * A type that represents a function that takes a single object (an Apify * dataset item) and converts it to an instance of the Document class. + * + * Change function signature to only be asynchronous for simplicity in v0.1.0 + * https://github.com/langchain-ai/langchainjs/pull/3262 */ export type ApifyDatasetMappingFunction<Metadata extends Record<string, any>> = ( item: Record<string | number, unknown> - ) => Document<Metadata> | Promise<Document<Metadata>>; + ) => + | Document<Metadata> + | Array<Document<Metadata>> + | Promise<Document<Metadata> | Array<Document<Metadata>>>; export interface ApifyDatasetLoaderConfig<Metadata extends Record<string, any>> extends AsyncCallerParams { @@ -66,15 +72,17 @@ export class ApifyDatasetLoader<Metadata extends Record<string, any>> * @returns An array of Document instances. */ async load(): Promise<Document<Metadata>[]> { - const datasetItems = ( - await this.apifyClient.dataset(this.datasetId).listItems({ clean: true }) - ).items; + const dataset = await this.apifyClient + .dataset(this.datasetId) + .listItems({ clean: true }); - return await Promise.all( - datasetItems.map((item) => + const documentList = await Promise.all( + dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item)) ) ); + + return documentList.flat(); } /**