From 02f6662a49ec6b64b262bb93b6f15b22458a611a Mon Sep 17 00:00:00 2001
From: Omar Abdelkader <omikader@gmail.com>
Date: Thu, 16 Nov 2023 20:20:05 -0500
Subject: [PATCH] feat(apify): support Document[] return type for mapping
 function (#3262)

* feat(apify): support Document[] for mapping function

* misc: make it backwards compatible
---
 .../src/document_loaders/web/apify_dataset.ts | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/langchain/src/document_loaders/web/apify_dataset.ts b/langchain/src/document_loaders/web/apify_dataset.ts
index b5884c888a8c..977bdd725b56 100644
--- a/langchain/src/document_loaders/web/apify_dataset.ts
+++ b/langchain/src/document_loaders/web/apify_dataset.ts
@@ -15,11 +15,17 @@ import { getEnvironmentVariable } from "../../util/env.js";
 /**
  * A type that represents a function that takes a single object (an Apify
  * dataset item) and converts it to an instance of the Document class.
+ *
+ * Change function signature to only be asynchronous for simplicity in v0.1.0
+ * https://github.com/langchain-ai/langchainjs/pull/3262
  */
 export type ApifyDatasetMappingFunction<Metadata extends Record<string, any>> =
   (
     item: Record<string | number, unknown>
-  ) => Document<Metadata> | Promise<Document<Metadata>>;
+  ) =>
+    | Document<Metadata>
+    | Array<Document<Metadata>>
+    | Promise<Document<Metadata> | Array<Document<Metadata>>>;
 
 export interface ApifyDatasetLoaderConfig<Metadata extends Record<string, any>>
   extends AsyncCallerParams {
@@ -66,15 +72,17 @@ export class ApifyDatasetLoader<Metadata extends Record<string, any>>
    * @returns An array of Document instances.
    */
   async load(): Promise<Document<Metadata>[]> {
-    const datasetItems = (
-      await this.apifyClient.dataset(this.datasetId).listItems({ clean: true })
-    ).items;
+    const dataset = await this.apifyClient
+      .dataset(this.datasetId)
+      .listItems({ clean: true });
 
-    return await Promise.all(
-      datasetItems.map((item) =>
+    const documentList = await Promise.all(
+      dataset.items.map((item) =>
         this.caller.call(async () => this.datasetMappingFunction(item))
       )
     );
+
+    return documentList.flat();
   }
 
   /**