Skip to content

Commit

Permalink
Extract markdown code blocks node
Browse files Browse the repository at this point in the history
  • Loading branch information
abrenneke committed Oct 12, 2023
1 parent 8167d27 commit 1d55f5c
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 1 deletion.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions packages/app/src/hooks/useBuiltInNodeImages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import loadDatasetNodeImage from '../assets/node_images/load_dataset_node.png';
import splitNodeImage from '../assets/node_images/split_node.png';
import getDatasetRowNodeImage from '../assets/node_images/get_dataset_row_node.png';
import sliceNodeImage from '../assets/node_images/slice_node.png';
import extractMarkdownCodeBlocksImage from '../assets/node_images/extract_markdown_code_blocks_node.png';

export const useBuiltInNodeImages = (): Record<BuiltInNodeType, string> => {
return {
Expand Down Expand Up @@ -127,5 +128,6 @@ export const useBuiltInNodeImages = (): Record<BuiltInNodeType, string> => {
split: splitNodeImage,
getDatasetRow: getDatasetRowNodeImage,
slice: sliceNodeImage,
extractMarkdownCodeBlocks: extractMarkdownCodeBlocksImage,
};
};
6 changes: 5 additions & 1 deletion packages/core/src/model/Nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ export * from './nodes/GetDatasetRowNode.js';
import { sliceNode } from './nodes/SliceNode.js';
export * from './nodes/SliceNode.js';

import { extractMarkdownCodeBlocksNode } from './nodes/ExtractMarkdownCodeBlocksNode.js';
export * from './nodes/ExtractMarkdownCodeBlocksNode.js';

export const registerBuiltInNodes = (registry: NodeRegistration) => {
return registry
.register(toYamlNode)
Expand Down Expand Up @@ -251,7 +254,8 @@ export const registerBuiltInNodes = (registry: NodeRegistration) => {
.register(splitNode)
.register(datasetNearestNeighborsNode)
.register(getDatasetRowNode)
.register(sliceNode);
.register(sliceNode)
.register(extractMarkdownCodeBlocksNode);
};

let globalRivetNodeRegistry = registerBuiltInNodes(new NodeRegistration());
Expand Down
124 changes: 124 additions & 0 deletions packages/core/src/model/nodes/ExtractMarkdownCodeBlocksNode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import {
type ChartNode,
type NodeId,
type NodeInputDefinition,
type PortId,
type NodeOutputDefinition,
} from '../NodeBase.js';
import { nanoid } from 'nanoid/non-secure';
import { NodeImpl, type NodeUIData } from '../NodeImpl.js';
import { nodeDefinition } from '../NodeDefinition.js';
import { expectType } from '../../utils/expectType.js';
import type { Inputs, Outputs } from '../GraphProcessor.js';

export type ExtractMarkdownCodeBlocksNode = ChartNode<'extractMarkdownCodeBlocks', {}>;

export class ExtractMarkdownCodeBlocksNodeImpl extends NodeImpl<ExtractMarkdownCodeBlocksNode> {
static create(): ExtractMarkdownCodeBlocksNode {
const chartNode: ExtractMarkdownCodeBlocksNode = {
type: 'extractMarkdownCodeBlocks',
title: 'Extract Markdown Code Blocks',
id: nanoid() as NodeId,
visualData: {
x: 0,
y: 0,
width: 250,
},
data: {},
};

return chartNode;
}

getInputDefinitions(): NodeInputDefinition[] {
return [
{
id: 'input' as PortId,
title: 'Input',
dataType: 'string',
required: true,
},
];
}

getOutputDefinitions(): NodeOutputDefinition[] {
return [
{
id: 'firstBlock' as PortId,
title: 'First Block',
dataType: 'string',
},
{
id: 'allBlocks' as PortId,
title: 'All Blocks',
dataType: 'string[]',
},
{
id: 'languages' as PortId,
title: 'Languages',
dataType: 'string[]',
},
];
}

static getUIData(): NodeUIData {
return {
infoBoxBody: `
Extracts the code blocks in the input Markdown text.
Outputs the first matched block, all matched blocks, and the languages specified for the blocks.
`,
infoBoxTitle: 'Extract Markdown Code Blocks Node',
contextMenuTitle: 'Extract Markdown Code Blocks',
group: ['Text'],
};
}

async process(inputs: Inputs): Promise<Outputs> {
const inputString = expectType(inputs['input' as PortId], 'string');

const regex = /```(\w*)\n([\s\S]*?)```/g;
let match;
let firstBlock: string | undefined;
const allBlocks = [];
const languages = [];

while ((match = regex.exec(inputString)) !== null) {
const language = match[1];
const block = match[2];

if (!firstBlock) {
firstBlock = block!;
}

allBlocks.push(block!);
languages.push(language!);
}

return {
['firstBlock' as PortId]:
firstBlock == null
? {
type: 'control-flow-excluded',
value: undefined,
}
: {
type: 'string',
value: firstBlock,
},
['allBlocks' as PortId]: {
type: 'string[]',
value: allBlocks,
},
['languages' as PortId]: {
type: 'string[]',
value: languages,
},
};
}
}

export const extractMarkdownCodeBlocksNode = nodeDefinition(
ExtractMarkdownCodeBlocksNodeImpl,
'Extract Markdown Code Blocks',
);
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
119 changes: 119 additions & 0 deletions packages/docs/docs/node-reference/extract-markdown-code-blocks.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
---
id: extract-markdown-code-blocks
title: Extract Markdown Code Blocks Node
sidebar_label: Extract Markdown Code Blocks
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

![Extract Markdown Code Blocks Node Screenshot](./assets/extract-markdown-code-blocks-node.png)

## Overview

The Extract Markdown Code Blocks Node is used to extract code blocks from a Markdown text. It extracts all code blocks and the specified languages for each block. The node outputs the first matched block, all matched blocks, and the languages specified for the blocks.

A markdown code block is defined as a code block that is surrounded by three backticks on each side. For example:

````markdown
Here is some JavaScript code:

```javascript
console.log('Hello, world!');
```
````

This node is useful when working with LLMs that have been trained extensively on replying with markdown data.

<Tabs
defaultValue="inputs"
values={[
{label: 'Inputs', value: 'inputs'},
{label: 'Outputs', value: 'outputs'},
{label: 'Editor Settings', value: 'settings'},
]
}>

<TabItem value="inputs">

## Inputs

| Title | Data Type | Description | Default Value | Notes |
| ----- | --------- | --------------------------------------------------- | ------------- | ------------------------------------------------------------- |
| Input | `string` | The Markdown text from which to extract code blocks | (required) | The input will be coerced into a string if it is not a string |

</TabItem>

<TabItem value="outputs">

## Outputs

| Title | Data Type | Description | Notes |
| ----------- | ---------- | ------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------ |
| First Block | `string` | The first code block found in the input Markdown text | If no code block is found, this output will not be ran |
| All Blocks | `string[]` | All code blocks found in the input Markdown text | If no code block is found, this output will be an empty array |
| Languages | `string[]` | The languages specified for each code block in the input Markdown text, in the order they appear | If no language is specified for a code block, the corresponding element in the array will be an empty string |

</TabItem>

<TabItem value="settings">

## Editor Settings

This node has no configurable editor settings.

</TabItem>

</Tabs>

## Example 1: Extract code blocks from a Markdown text

1. Create a [Text Node](./text.mdx) and set the text to the following Markdown text:

````markdown
Here is some JavaScript code:

```javascript
console.log('Hello, world!');
```

And here is some Python code:

```python
print('Hello, world!')
```

That's it!
````

2. Create an Extract Markdown Code Blocks Node and connect the Text Node to its `Input` input.
3. Run the graph. The `First Block` output of the Extract Markdown Code Blocks Node should be `console.log('Hello, world!');`, the `All Blocks` output should be an array containing `console.log('Hello, world!');` and `print('Hello, world!')`, and the `Languages` output should be an array containing `javascript` and `python`.

![Extract Markdown Code Blocks Node Example 1](./assets/extract-markdown-code-blocks-node-example-01.png)

## Error Handling

The Extract Markdown Code Blocks Node will not error under normal circumstances. If the input text does not contain any code blocks, the `First Block` output will not be ran, and the `All Blocks` and `Languages` outputs will be empty arrays.

## FAQ

**Q: What happens if a code block does not specify a language?**

A: The corresponding element in the `Languages` output array will be an empty string.

**Q: What happens if the input text contains non-Markdown text?**

A: The node will ignore any non-Markdown text and only extract code blocks. If the input text does not contain any code blocks, the `First Block` output will not be ran, and the `All Blocks` and `Languages` outputs will be empty arrays.

## See Also

- [Extract JSON Node](./extract-json.mdx)
- [Extract YAML Node](./extract-yaml.mdx)
- [Extract Object Path Node](./extract-object-path.mdx)
- [Extract with Regex Node](./extract-with-regex.mdx)
- [Text Node](./text.mdx)
- [Split Node](./split.mdx)

```
```
1 change: 1 addition & 0 deletions packages/docs/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ const sidebars = {
collapsed: false,
items: [
'node-reference/chunk',
'node-reference/extract-markdown-code-blocks',
'node-reference/extract-with-regex',
'node-reference/join',
'node-reference/prompt',
Expand Down

0 comments on commit 1d55f5c

Please sign in to comment.