-
Notifications
You must be signed in to change notification settings - Fork 269
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
251 additions
and
1 deletion.
There are no files selected for viewing
Binary file added
BIN
+22.9 KB
packages/app/src/assets/node_images/extract_markdown_code_blocks_node.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
packages/core/src/model/nodes/ExtractMarkdownCodeBlocksNode.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import { | ||
type ChartNode, | ||
type NodeId, | ||
type NodeInputDefinition, | ||
type PortId, | ||
type NodeOutputDefinition, | ||
} from '../NodeBase.js'; | ||
import { nanoid } from 'nanoid/non-secure'; | ||
import { NodeImpl, type NodeUIData } from '../NodeImpl.js'; | ||
import { nodeDefinition } from '../NodeDefinition.js'; | ||
import { expectType } from '../../utils/expectType.js'; | ||
import type { Inputs, Outputs } from '../GraphProcessor.js'; | ||
|
||
export type ExtractMarkdownCodeBlocksNode = ChartNode<'extractMarkdownCodeBlocks', {}>; | ||
|
||
export class ExtractMarkdownCodeBlocksNodeImpl extends NodeImpl<ExtractMarkdownCodeBlocksNode> { | ||
static create(): ExtractMarkdownCodeBlocksNode { | ||
const chartNode: ExtractMarkdownCodeBlocksNode = { | ||
type: 'extractMarkdownCodeBlocks', | ||
title: 'Extract Markdown Code Blocks', | ||
id: nanoid() as NodeId, | ||
visualData: { | ||
x: 0, | ||
y: 0, | ||
width: 250, | ||
}, | ||
data: {}, | ||
}; | ||
|
||
return chartNode; | ||
} | ||
|
||
getInputDefinitions(): NodeInputDefinition[] { | ||
return [ | ||
{ | ||
id: 'input' as PortId, | ||
title: 'Input', | ||
dataType: 'string', | ||
required: true, | ||
}, | ||
]; | ||
} | ||
|
||
getOutputDefinitions(): NodeOutputDefinition[] { | ||
return [ | ||
{ | ||
id: 'firstBlock' as PortId, | ||
title: 'First Block', | ||
dataType: 'string', | ||
}, | ||
{ | ||
id: 'allBlocks' as PortId, | ||
title: 'All Blocks', | ||
dataType: 'string[]', | ||
}, | ||
{ | ||
id: 'languages' as PortId, | ||
title: 'Languages', | ||
dataType: 'string[]', | ||
}, | ||
]; | ||
} | ||
|
||
static getUIData(): NodeUIData { | ||
return { | ||
infoBoxBody: ` | ||
Extracts the code blocks in the input Markdown text. | ||
Outputs the first matched block, all matched blocks, and the languages specified for the blocks. | ||
`, | ||
infoBoxTitle: 'Extract Markdown Code Blocks Node', | ||
contextMenuTitle: 'Extract Markdown Code Blocks', | ||
group: ['Text'], | ||
}; | ||
} | ||
|
||
async process(inputs: Inputs): Promise<Outputs> { | ||
const inputString = expectType(inputs['input' as PortId], 'string'); | ||
|
||
const regex = /```(\w*)\n([\s\S]*?)```/g; | ||
let match; | ||
let firstBlock: string | undefined; | ||
const allBlocks = []; | ||
const languages = []; | ||
|
||
while ((match = regex.exec(inputString)) !== null) { | ||
const language = match[1]; | ||
const block = match[2]; | ||
|
||
if (!firstBlock) { | ||
firstBlock = block!; | ||
} | ||
|
||
allBlocks.push(block!); | ||
languages.push(language!); | ||
} | ||
|
||
return { | ||
['firstBlock' as PortId]: | ||
firstBlock == null | ||
? { | ||
type: 'control-flow-excluded', | ||
value: undefined, | ||
} | ||
: { | ||
type: 'string', | ||
value: firstBlock, | ||
}, | ||
['allBlocks' as PortId]: { | ||
type: 'string[]', | ||
value: allBlocks, | ||
}, | ||
['languages' as PortId]: { | ||
type: 'string[]', | ||
value: languages, | ||
}, | ||
}; | ||
} | ||
} | ||
|
||
export const extractMarkdownCodeBlocksNode = nodeDefinition( | ||
ExtractMarkdownCodeBlocksNodeImpl, | ||
'Extract Markdown Code Blocks', | ||
); |
Binary file added
BIN
+72.1 KB
...ocs/docs/node-reference/assets/extract-markdown-code-blocks-node-example-01.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+22.9 KB
packages/docs/docs/node-reference/assets/extract-markdown-code-blocks-node.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
119 changes: 119 additions & 0 deletions
119
packages/docs/docs/node-reference/extract-markdown-code-blocks.mdx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
--- | ||
id: extract-markdown-code-blocks | ||
title: Extract Markdown Code Blocks Node | ||
sidebar_label: Extract Markdown Code Blocks | ||
--- | ||
|
||
import Tabs from '@theme/Tabs'; | ||
import TabItem from '@theme/TabItem'; | ||
|
||
![Extract Markdown Code Blocks Node Screenshot](./assets/extract-markdown-code-blocks-node.png) | ||
|
||
## Overview | ||
|
||
The Extract Markdown Code Blocks Node is used to extract code blocks from a Markdown text. It extracts all code blocks and the specified languages for each block. The node outputs the first matched block, all matched blocks, and the languages specified for the blocks. | ||
|
||
A markdown code block is defined as a code block that is surrounded by three backticks on each side. For example: | ||
|
||
````markdown | ||
Here is some JavaScript code: | ||
|
||
```javascript | ||
console.log('Hello, world!'); | ||
``` | ||
```` | ||
|
||
This node is useful when working with LLMs that have been trained extensively on replying with markdown data. | ||
|
||
<Tabs | ||
defaultValue="inputs" | ||
values={[ | ||
{label: 'Inputs', value: 'inputs'}, | ||
{label: 'Outputs', value: 'outputs'}, | ||
{label: 'Editor Settings', value: 'settings'}, | ||
] | ||
}> | ||
|
||
<TabItem value="inputs"> | ||
|
||
## Inputs | ||
|
||
| Title | Data Type | Description | Default Value | Notes | | ||
| ----- | --------- | --------------------------------------------------- | ------------- | ------------------------------------------------------------- | | ||
| Input | `string` | The Markdown text from which to extract code blocks | (required) | The input will be coerced into a string if it is not a string | | ||
|
||
</TabItem> | ||
|
||
<TabItem value="outputs"> | ||
|
||
## Outputs | ||
|
||
| Title | Data Type | Description | Notes | | ||
| ----------- | ---------- | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------ | | ||
| First Block | `string` | The first code block found in the input Markdown text | If no code block is found, this output will not be ran | | ||
| All Blocks | `string[]` | All code blocks found in the input Markdown text | If no code block is found, this output will be an empty array | | ||
| Languages | `string[]` | The languages specified for each code block in the input Markdown text, in the order they appear | If no language is specified for a code block, the corresponding element in the array will be an empty string | | ||
|
||
</TabItem> | ||
|
||
<TabItem value="settings"> | ||
|
||
## Editor Settings | ||
|
||
This node has no configurable editor settings. | ||
|
||
</TabItem> | ||
|
||
</Tabs> | ||
|
||
## Example 1: Extract code blocks from a Markdown text | ||
|
||
1. Create a [Text Node](./text.mdx) and set the text to the following Markdown text: | ||
|
||
````markdown | ||
Here is some JavaScript code: | ||
|
||
```javascript | ||
console.log('Hello, world!'); | ||
``` | ||
|
||
And here is some Python code: | ||
|
||
```python | ||
print('Hello, world!') | ||
``` | ||
|
||
That's it! | ||
```` | ||
|
||
2. Create an Extract Markdown Code Blocks Node and connect the Text Node to its `Input` input. | ||
3. Run the graph. The `First Block` output of the Extract Markdown Code Blocks Node should be `console.log('Hello, world!');`, the `All Blocks` output should be an array containing `console.log('Hello, world!');` and `print('Hello, world!')`, and the `Languages` output should be an array containing `javascript` and `python`. | ||
|
||
![Extract Markdown Code Blocks Node Example 1](./assets/extract-markdown-code-blocks-node-example-01.png) | ||
|
||
## Error Handling | ||
|
||
The Extract Markdown Code Blocks Node will not error under normal circumstances. If the input text does not contain any code blocks, the `First Block` output will not be ran, and the `All Blocks` and `Languages` outputs will be empty arrays. | ||
|
||
## FAQ | ||
|
||
**Q: What happens if a code block does not specify a language?** | ||
|
||
A: The corresponding element in the `Languages` output array will be an empty string. | ||
|
||
**Q: What happens if the input text contains non-Markdown text?** | ||
|
||
A: The node will ignore any non-Markdown text and only extract code blocks. If the input text does not contain any code blocks, the `First Block` output will not be ran, and the `All Blocks` and `Languages` outputs will be empty arrays. | ||
|
||
## See Also | ||
|
||
- [Extract JSON Node](./extract-json.mdx) | ||
- [Extract YAML Node](./extract-yaml.mdx) | ||
- [Extract Object Path Node](./extract-object-path.mdx) | ||
- [Extract with Regex Node](./extract-with-regex.mdx) | ||
- [Text Node](./text.mdx) | ||
- [Split Node](./split.mdx) | ||
|
||
``` | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters