Skip to content

Commit

Permalink
Merge pull request FlowiseAI#1424 from FlowiseAI/feature/S3
Browse files Browse the repository at this point in the history
Feature/update S3 loader
  • Loading branch information
HenryHengZJ authored Dec 22, 2023
2 parents ff74920 + 1a4ead3 commit 177d1ae
Showing 1 changed file with 68 additions and 9 deletions.
77 changes: 68 additions & 9 deletions packages/components/nodes/documentloaders/S3File/S3File.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class S3_DocumentLoaders implements INode {
constructor() {
this.label = 'S3'
this.name = 'S3'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 's3.svg'
this.category = 'Document Loaders'
Expand Down Expand Up @@ -113,12 +113,62 @@ class S3_DocumentLoaders implements INode {
optional: true
},
{
label: 'NarrativeText Only',
name: 'narrativeTextOnly',
label: 'Element Type',
name: 'elementType',
description:
'Only load documents with NarrativeText metadata from Unstructured. See how Unstructured partition data <a target="_blank" href="https://unstructured-io.github.io/unstructured/bricks/partition.html#">here</a>',
default: true,
type: 'boolean',
'Unstructured partition document into different types, select the types to return. If not selected, all types will be returned',
type: 'multiOptions',
options: [
{
label: 'FigureCaption',
name: 'FigureCaption'
},
{
label: 'NarrativeText',
name: 'NarrativeText'
},
{
label: 'ListItem',
name: 'ListItem'
},
{
label: 'Title',
name: 'Title'
},
{
label: 'Address',
name: 'Address'
},
{
label: 'Table',
name: 'Table'
},
{
label: 'PageBreak',
name: 'PageBreak'
},
{
label: 'Header',
name: 'Header'
},
{
label: 'Footer',
name: 'Footer'
},
{
label: 'UncategorizedText',
name: 'UncategorizedText'
},
{
label: 'Image',
name: 'Image'
},
{
label: 'Formula',
name: 'Formula'
}
],
default: [],
optional: true,
additionalParams: true
},
Expand All @@ -138,7 +188,7 @@ class S3_DocumentLoaders implements INode {
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
const unstructuredAPIKey = nodeData.inputs?.unstructuredAPIKey as string
const metadata = nodeData.inputs?.metadata
const narrativeTextOnly = nodeData.inputs?.narrativeTextOnly as boolean
const elementType = nodeData.inputs?.elementType as string

const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
Expand Down Expand Up @@ -169,6 +219,15 @@ class S3_DocumentLoaders implements INode {
}
}

let elementTypes: string[] = []
if (elementType) {
try {
elementTypes = JSON.parse(elementType)
} catch (e) {
elementTypes = []
}
}

loader.load = async () => {
const tempDir = fsDefault.mkdtempSync(path.join(os.tmpdir(), 's3fileloader-'))

Expand Down Expand Up @@ -235,10 +294,10 @@ class S3_DocumentLoaders implements INode {
}
}
})
return narrativeTextOnly ? finaldocs.filter((doc) => doc.metadata.category === 'NarrativeText') : finaldocs
return elementTypes.length ? finaldocs.filter((doc) => elementTypes.includes(doc.metadata.category)) : finaldocs
}

return narrativeTextOnly ? docs.filter((doc) => doc.metadata.category === 'NarrativeText') : docs
return elementTypes.length ? docs.filter((doc) => elementTypes.includes(doc.metadata.category)) : docs
}
}
module.exports = { nodeClass: S3_DocumentLoaders }

0 comments on commit 177d1ae

Please sign in to comment.