diff --git a/docs/index.rst b/docs/index.rst index d75e977..ae882a1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -105,7 +105,8 @@ Graphbook is completely free and open source and can be deployed anywhere. We of .. toctree:: :caption: Docs - :maxdepth: 2 + :titlesonly: + :maxdepth: 3 :hidden: installing diff --git a/docs/learn/basics.rst b/docs/learn/basics.rst new file mode 100644 index 0000000..435bf35 --- /dev/null +++ b/docs/learn/basics.rst @@ -0,0 +1,569 @@ +.. meta:: + :description: Discover the basics. Learn how to create your first workflow in Graphbook. + :twitter:description: Discover the basics. Learn how to create your first workflow in Graphbook. + +.. _Basics: + +Basics +###### + +If you haven't already, make sure to install Graphbook by following the installation :ref:`guide`. +Once Graphbook is installed, execute ``graphbook`` in a directory that you want to create your workflows in. +Also, because workflow files are serialized as **.json** files, it is recommended to track your workflows and custom nodes in a version control system like Git. + +.. code-block:: console + + $ mkdir my_graphbook_project + $ cd my_graphbook_project + $ git init + $ graphbook + +Executing ``graphbook`` should automatically create a directory called `workflow` containing two subdirectories called `custom_nodes` and `docs` like so: + +:: + + my_graphbook_project + └── workflow + ├── custom_nodes + └── docs + +All of your custom nodes should be located inside of `workflow/custom_nodes`. +Graphbook is tracking that directory for any files ending with **.py** and will automatically detect classes that inherit from **Step** or **Resource** and functions defined and decorated with **@step** or **@resource**. + +Workflows +========= + +Keep in mind that when working with Graphbook, the development cycle in building a workflow can be illustrated in a few simple steps: + +#. **Build in Python** + + Write processing nodes using Python in your favorite code editor + +#. **Assemble in Graphbook** + + Assemble an ML workflow in the Graphbook web UI with your own processing nodes + +#. **Execute** + + Run, monitor, and adjust parameters in your workflow + +Go into the Graphbook UI (http://localhost:8005), and create a new workflow by adding a new **.json** file. + +.. warning:: + + Do not try to create the .json file outside of the web UI. + Graphbook needs the .json file to be structured specifically to properly serialize the graph and will create the file with such structure if you create it through the UI. + +This is where you can create your workflows. +When you modify you workflow, the changes are automatically saved to the **.json** file. +It is recommended to regularly commit the **.json** file to your version control system. + +Steps +===== + +Inside of your custom nodes directory, create a new Python file called `my_first_nodes.py`, and create the below step inside of it: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_first_nodes.py + + from graphbook import Note, step, param, output, event + import random + + @step("MyStep") + def my_first_step(ctx, note: Note): + note["message"] = "Hello, World!" + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_first_nodes.py + + from graphbook.steps import Step + from graphbook import Note + import random + + class MyStep(Step): + RequiresInput = True + Parameters = {} + Outputs = ["out"] + Category = "" + def __init__(self, prob): + super().__init__() + self.prob = prob + + def on_note(self, note: Note): + note["message"] = "Hello, World!" + +In the above, we did the following: + +#. We named our step "MyStep" +#. We defined a method called ``my_first_step`` which is called upon receiving a note. This method simply adds a key called "message" to the note with the value "Hello, World!". + +If you're building steps the recommended way, you can observe that the step also has a context ``ctx``. +This is essentially the ``self`` object (the underlying class instance) since all steps are just classes that inherit from the base class :class:`graphbook.steps.Step`. +With decorators, you are actually creating a new Step class with guardrails to prevent you from making common mistakes. + +You can provide implementations for any of the methods/events listed in :class:`graphbook.steps.Step`. + +You can add this step to your workflow by right clicking the pane, and add a new Step node and select `MyStep` from the dropdown (Add Step > MyStep). +Notice how your inputs, parameters, and outputs are automatically populated. + + +Load Data +========= + +Source steps are steps that generate data. +They are the starting points of your workflow. +You can create a source step by using the :func:`graphbook.source` decorator or by inheriting from the class :class:`graphbook.steps.GeneratorSourceStep`. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_first_source.py + + from graphbook import Note, step, source + import json + + @step("MySource") + @source() + def my_first_source(ctx): + with open("path/to/data.json") as f: + data = json.load(f) + for item in data: + yield Note(item) + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_first_source.py + + from graphbook.steps import GeneratorSourceStep + from graphbook import Note + + class MySource(GeneratorSourceStep): + RequiresInput = False + Parameters = {} + Outputs = ["out"] + Category = "" + def __init__(self): + super().__init__() + + def load(self): + with open("path/to/data.json") as f: + data = json.load(f) + for item in data: + yield Note(item) + +.. seealso:: + + :ref:`Load Images` for more advanced topics on loading images. + +Parameters +========== + +Parameters are configurable options to nodes which appear on the node in the web UI and can be changed by the user. +You can add parameters to your steps by using the :func:`graphbook.param` decorator or by adding to the dictionary called ``Parameters`` in the class-based nodes. +There exists a number of parameter types such as "string", "number", "boolean". + +.. seealso:: + + A list of :ref:`Available Parameters`. + +Below shows an example for string and number parameters. +Multiple parameters can be used at the same time: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook import Note, step, param + + @step("MyStep") + @param("message", type="string", default="Hello, World!") + @param("offset", type="number", default=0) + def my_step(ctx, note: Note): + my_message = ctx.message + my_offset = ctx.offset + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook.steps import Step + from graphbook import Note + + class MyStep(Step): + RequiresInput = True + Parameters = { + "message": { + "type": "string", + "default": "Hello, World!" + }, + "offset": { + "type": "number", + "default": 0 + } + } + Outputs = ["out"] + Category = "" + def __init__(self, message, offset): + super().__init__() + self.message = message + self.offset = offset + +When using decorators, you don't need to manually assign them to the context ``ctx``. + +Casting +******* + +When you use the parameter in the function, you can cast it to a specific type. +Sometimes, you want to cast the parameter to a specific type or pass it into a custom function before Graphbook makes the assignment to the context. + +.. code-block:: python + + @step("MyStep") + @param("dimension", type="number", default=0, cast_as=int) + def my_step(ctx, note: Note): + mean = note["tensor"].mean(dim=ctx.dimension) + +If your parameter is of type "function", you don't need to cast it when using decorators. +The Python function automatically gets interpreted using :func:`graphbook.utils.transform_function_string`. + +.. code-block:: python + + @step("MyStep") + @param("custom_fn", type="function") + def my_step(ctx, note: Note): + ctx.custom_fn(note["value"]) + +Outputs +======= + +Steps can have multiple outputs for routing notes to different steps or branches of the graph. +By default, a step has one output slot named "out". +You can add more output slots by using the :func:`graphbook.output` decorator or by adding to the list called ``Outputs`` in the class-based nodes. +Then, you may route a note based on overriding the method :meth:`graphbook.steps.Step.forward_note`. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook import Note, step, output + + @step("MyStep") + @output("good", "junk") + @param("threshold", type="number", default=0.5) + def my_step(ctx, note: Note): + if note['value'] > ctx.threshold: + return "good" + return "junk" + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook.steps import Step + from graphbook import Note + + class MyStep(Step): + RequiresInput = True + Parameters = { + "threshold": { + "type": "number", + "default": 0.5 + } + } + Outputs = ["good", "junk"] + Category = "" + def __init__(self, threshold): + super().__init__() + self.threshold = threshold + + def forward_note(self, note: Note) -> str: + if note['value'] > self.threshold: + return "good" + return "junk" + +.. seealso:: + + :ref:`Filter` for more advanced topics on outputs. + +Events +====== + +Events are methods that are called at specific points in the lifecycle of a step. +You can add events to your steps by using the :func:`graphbook.event` decorator or by just overriding the base class methods. +The event that is decorated by default is the method :meth:`graphbook.steps.Step.on_note`, but this is different depending on the type of step that is inherited. +For example, batch steps (:class:`graphbook.steps.BatchStep`) will override :meth:`graphbook.steps.BatchStep.on_item_batch` by default. +Using :func:`graphbook.event` is an easy way to override a method. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook import Note, step, event + + def forward_note(ctx, note: Note) -> str: + if note['value'] > 0.5: + return "good" + return "junk" + + @step("MyStep") + @event("forward_note", forward_note) + def my_step(ctx, note: Note): # on_note + ctx.log(note) + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook.steps import Step + from graphbook import Note + + class MyStep(Step): + RequiresInput = True + Parameters = {} + Outputs = ["good", "junk"] + Category = "" + def __init__(self): + super().__init__() + + def on_note(self, note: Note): + self.log(note) + + def forward_note(self, note: Note) -> str: + if note['value'] > 0.5: + return "good" + return "junk" + +You can also decorate functions with :func:`graphbook.step` multiple times to define different events for the same step. + +.. code-block:: python + + @step("MyStep") # on_note + def my_step(ctx, note: Note): + ... + + @step("MyStep", event="__init__") + def my_step_init(ctx): + ... + + @step("MyStep", event="forward_note") + def my_step_forward(ctx, note: Note): + ... + + @step("MyStep", event="on_clear") + def my_step_clear(ctx): + ... + +.. seealso:: + + :class:`graphbook.steps.Step` for more overrideable events. + +Resources +========= + +Resources are not part of the flow of notes but can hold Python objects such as PyTorch models that can be used by other steps. +You can create a resource node by using the :func:`graphbook.resource` decorator or by inheriting from the class :class:`graphbook.steps.Resource`. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_first_resource.py + + from graphbook import resource + import torch + + @resource("MyModel") + def my_first_resource(ctx): + return torch.nn.Linear(10, 1) + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_first_resource.py + + from graphbook.steps import Resource + import torch + + class MyModel(Resource): + Category = "" + Parameters = {} + def __init__(self): + super().__init__() + self.model = torch.nn.Linear(10, 1).to("cuda") + + def value(self): + return self.model + +You can access this resource in your step by setting a parameter that accepts a "resource" like so: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + @step("MyStep") + @param("model", type="resource") + def my_step(ctx, note: Note): + model = ctx.model + ... + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + class MyStep(Step): + RequiresInput = True + Parameters = { + "model": {"type": "resource"} + } + Outputs = ["out"] + Category = "" + def __init__(self, model): + super().__init__() + self.model = model + + def on_note(self, note: Note) -> str: + model = self.model + ... + +Resources can also have parameters. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_first_resource.py + + from graphbook import resource, param + import torch + + @resource("MyModel") + @param("model_id", type="string", default="model_1") + def my_first_resource(ctx): + model = MyPytorchModel() + model.load_state_dict(torch.load(ctx.model_id)).to("cuda") + model.eval() + return model + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_first_resource.py + + from graphbook.steps import Resource + import torch + + class MyModel(Resource): + Category = "" + Parameters = { + "model_id": {"type": "string", "default": "model_1"} + } + def __init__(self, model_id): + super().__init__() + model = MyPytorchModel() + model.load_state_dict(torch.load(model_id)).to("cuda") + model.eval() + self.model = model + + def value(self): + return self.model + +Categories +========== + +You can organize your steps and resources better by assigning them to different categories. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + @step("Custom/MyStep") + def my_step(ctx, note: Note): + ... + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + class MyStep(Step): + ... + Category = "Custom" + ... + +Categories can be multi-leveled with more forward slashes. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + + @step("Custom/Filtering/A") + def a(ctx, note: Note): + ... + + @step("Custom/Producer/B") + def b(ctx, note: Note): + ... + + .. tab-item:: class + + .. code-block:: python + + class A(Step): + ... + Category = "Custom/Filtering" + ... + + class B(Step): + ... + Category = "Custom/Producer" + ... + +.. warning:: + + Even though 2 steps can have different categories, the step name (basename) must be unique. + + Example: + + .. code-block:: python + + # Not OK + + @step("Custom/Filtering/A") + def a(ctx, note: Note): + ... + + @step("Custom/Producer/A") # Will override the previous step + def b(ctx, note: Note): + ... \ No newline at end of file diff --git a/docs/learn/batch.rst b/docs/learn/batch.rst new file mode 100644 index 0000000..4c6c527 --- /dev/null +++ b/docs/learn/batch.rst @@ -0,0 +1,193 @@ +.. meta:: + :description: Learn how to batch data with Graphbook's custom muiltiprocessing capabilities ensuring that your GPU is efficiently utilized. + :twitter:description: Learn how to batch data with Graphbook's custom muiltiprocessing capabilities ensuring that your GPU is efficiently utilized. + +Batch Data +########## + +One of the most important features offered by Graphbook is its multiprocessing capabilities. +Graphbook has a worker pool that can be used to parallelize the loading of data with your own custom defined function. +You can also use it to parallelize the writing of outputs, but that is covered in the next section. + +.. seealso:: + + :ref:`Workers` - Learn more about the workers behind your pipeline. + +Load and Batch Data +=================== + +In this section, we will cover the decorator :func:`graphbook.batch`, and how to use it to parallelize the loading of data. +For example, to create a batch step that loads images from the file system and convert them to PyTorch Tensors, you can use the following code: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/batch_steps.py + + from graphbook import Note, step, batch + from PIL import Image + from typing import List + import torch + import torchvision.transforms.functional as F + + # Custom defined function that will execute in parallel + @staticmethod + def convert_to_tensor(item: dict) -> torch.Tensor: + image_path = item["value"] + pil_image = Image.open(image_path) + return F.to_tensor(pil_image) + + @step("LoadImages") + @batch(8, "image_paths", load_fn=convert_to_tensor) + @staticmethod + def on_load_images(tensors: List[torch.Tensor], items: List[dict], notes: List[Note]): + for tensor, note in zip(tensors, notes): + if note["tensor"] is None: + note["tensor"] = [] + note["tensor"].append(tensor) + + .. tab-item:: class + + .. code-block:: python + + from graphbook.steps import BatchStep + from graphbook import Note + from PIL import Image + from typing import List + import torch + import torchvision.transforms.functional as F + + class LoadImages(BatchStep): + RequiresInput = True + Parameters = { + "batch_size": {"type": "number", "default": 8}, + "item_key": {"type": "string", "default": "image_paths"} + } + Outputs = ["out"] + Category = "" + + def __init__(self, batch_size, item_key): + super().__init__(batch_size, item_key) + + # Custom defined function that will execute in parallel + @staticmethod + def load_fn(item: dict) -> torch.Tensor: + image_path = item["value"] + pil_image = Image.open(image_path) + return F.to_tensor(pil_image) + + @staticmethod + def on_item_batch(tensors: List[torch.Tensor], items: List[dict], notes: List[Note]): + for tensor, note in zip(tensors, notes): + if note["tensor"] is None: + note["tensor"] = [] + note["tensor"].append(tensor) + +The above step simply loads images from the file system and converts them to PyTorch Tensors assuming that the notes containing the image paths come from another :ref:`source step`. + +Here is a breakdown of what we did: + +#. First, we defined a custom function ``convert_to_tensor`` that will execute in parallel. This function takes the input item that is specified by our batch step. +#. We give a name to our step "LoadImages". +#. We use the :func:`graphbook.batch` decorator to specify that this step is a batch step. The first parameter is the default batch size, the second parameter is the item key from the expected notes that we will use, and the third parameter is the function that we defined in the first step. + + .. note:: + + The first two parameters ``batch_size`` and ``item_key`` will be configurable in the UI. + If you are designing the step as a class, you must manually define these parameters. + +#. We mark the decorated method as static, because we do not care about the underlying class instance. +#. We define the :meth:`graphbook.steps.BatchStep.on_item_batch` method that will be executed which simply assigns the output tensors to the notes that they came from. + +.. tip:: + + A batch step decorates :meth:`graphbook.steps.BatchStep.on_item_batch` by default. + This method is executed with the following parameters, respectively: + + * The tensors (or whatever we output from out defined function) + * The associated input item + * The associated note that it came from + + All three lists should be of size equal to the batch size. + +Passing Data to an ML Model +=========================== + +Of course, if you're batching inputs such as tensors, you are most likely preparing them to be loaded into the GPU to pass them into an ML model. +By immediately passing your tensors to the model, we can avoid the large memory overhead of storing the tensors in the notes. +You can do so with the following example: + + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/batch_steps.py + + from graphbook import Note, step, batch + from typing import List + import torch + + @step("MyMLModel") + @batch(8, "image_paths", load_fn=convert_to_tensor) + @param("model", type="resource") + @torch.no_grad() + def on_load_images(ctx, images: List[torch.Tensor], items: List[dict], notes: List[Note]): + batch = torch.stack(images).to("cuda") + outputs = ctx.model(batch) + + # (Option 1) Store the model's outputs in the items + for output, item in zip(images, items): + item["output"] = output + + # (Option 2) Store the model's outputs in the note + for output, note in zip(outputs, notes): + if note["output"] is None: + note["output"] = [] + note["output"].append(output) + + .. tab-item:: class + + .. code-block:: python + + from graphbook.steps import BatchStep + from graphbook import Note + from typing import List + import torch + + class MyMLModel(BatchStep): + RequiresInput = True + Parameters = { + "batch_size": {"type": "number", "default": 8}, + "item_key": {"type": "string", "default": "image_paths"}, + "model": {"type": "resource"} + } + Outputs = ["out"] + Category = "" + + def __init__(self, batch_size, item_key, model): + super().__init__(batch_size, item_key) + self.model = model + + ... + + @torch.no_grad() + def on_item_batch(self, images: List[torch.Tensor], items: List[dict], notes: List[Note]): + batch = torch.stack(images).to("cuda") + outputs = self.model(batch) + + # (Option 1) Store the model's outputs in the items + for output, item in zip(images, items): + item["output"] = output + + # (Option 2) Store the model's outputs in the note + for output, note in zip(outputs, notes): + if note["output"] is None: + note["output"] = [] + note["output"].append(output) + +The example above assumes that there is already a resource containing a model, loaded into the GPU, that can be used to process the images. + diff --git a/docs/learn/document.rst b/docs/learn/document.rst new file mode 100644 index 0000000..ce6d739 --- /dev/null +++ b/docs/learn/document.rst @@ -0,0 +1,136 @@ +.. meta:: + :description: Learn how to write documentation for individual nodes and workflows, so that it displays right in the UI giving your users the information they need to start working. + :twitter:description: Learn how to write documentation for individual nodes and workflows, so that it displays right in the UI giving your users the information they need to start working. + +Document +######## + +You can document nodes and workflows in Graphbook, so that it displays right in the UI giving your users the information they need to start working. +Node documentation can be written in docstring, and workflow documentation can be written in a separate .md file. + +Node Documentation +================== + +You can document your nodes by writing a docstring right beneath the function header or the ``__init__`` function for the class-based nodes. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/documented_nodes.py + + from graphbook import step, param, output, Note + + @step("Filter", event="forward_note") + @param("threshold", type="number", default=0.5) + @output("good", "junk") + def filter(ctx, note: Note): + """ + Filter notes based on a threshold value. + + Parameters: + threshold (number): The threshold value to filter notes. + + Outputs: + good (list): Notes that pass the threshold. + junk (list): Notes that fail the threshold. + """ + if note['value'] > ctx.threshold: + return { "good": [note] } + return { "junk": [note] } + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/documented_nodes.py + + from graphbook.steps import Step + from graphbook import Note + + class Filter(Step): + RequiresInput = True + Parameters = { + "threshold": { + "type": "number", + "default": 0.5 + } + } + Outputs = ["good", "junk"] + Category = "" + def __init__(self, threshold): + """ + Filter notes based on a threshold value. + + Parameters: + threshold (number): The threshold value to filter notes. + + Outputs: + good (list): Notes that pass the threshold. + junk (list): Notes that fail the threshold. + """ + super().__init__() + self.threshold = threshold + + def forward_note(self, note: Note) -> str: + if note['value'] > self.threshold: + return { "good": [note] } + return { "junk": [note] } + +Similarly, for resources: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/documented_nodes.py + + from graphbook import resource, param + + @resource("Threshold") + @param("value", type="number", default=0.5) + def threshold(ctx): + """ + A threshold value. + + Parameters: + value (number): The threshold value. + """ + return ctx.value + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/documented_nodes.py + + from graphbook.resources import Resource + + class Threshold(Resource): + Parameters = { + "value": { + "type": "number", + "default": 0.5 + } + } + def __init__(self, value): + """ + A threshold value. + + Parameters: + value (number): The threshold value. + """ + super().__init__(value) + +.. _markdown: https://www.markdownguide.org/basic-syntax/ + +Workflow Documentation +====================== + +You can document your workflows by writing a .md file in the ``docs/`` directory. +The file should be named after the workflow name, and the content should be written in markdown_ format. + +For example: + +* Workflow file: ``MyFlow.json`` +* Documentation file: ``docs/MyFlow.md`` diff --git a/docs/learn/dump.rst b/docs/learn/dump.rst new file mode 100644 index 0000000..ccc7211 --- /dev/null +++ b/docs/learn/dump.rst @@ -0,0 +1,90 @@ +.. meta:: + :description: Learn how to write data with Graphbook's custom muiltiprocessing capabilities ensuring that your GPU is efficiently utilized. + :twitter:description: Learn how to write data with Graphbook's custom muiltiprocessing capabilities ensuring that your GPU is efficiently utilized. + +Dump Data +######### + + +You can also use the worker pool to parallelize the dumping of data to disk/network with your own custom defined function. + +.. seealso:: + + :ref:`Workers` - Learn more about the workers behind your pipeline. + +To parallelize dumping, we still need to use the decorator :func:`graphbook.batch` because dumping is made available to :class:`graphbook.steps.BatchStep`. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/batch_steps.py + + from graphbook import Note, step, batch + from PIL import Image + from typing import List + import torch + import torchvision.transforms.functional as F + + # Custom defined function that will execute in parallel + @staticmethod + def save_image(image: Image.Image, output_path: str): + image.save(output_path) + + @step("LoadImages") + @batch(8, "image_paths", dump_fn=save_image, load_fn=convert_to_tensor) + @staticmethod + def on_load_images(tensors: List[torch.Tensor], items: List[dict], notes: List[Note]): + # Generate images + ... + + args = [] + for image, item in zip(images, items): + input_path = items['value'] + output_path = input_path.replace('.jpg', '_processed.jpg') + args.append((image, output_path)) + return args + + .. tab-item:: class + + .. code-block:: python + + from graphbook.steps import BatchStep + from graphbook import Note + from PIL import Image + from typing import List + import torch + import torchvision.transforms.functional as F + + class LoadImages(BatchStep): + RequiresInput = True + Parameters = { + "batch_size": {"type": "number", "default": 8}, + "item_key": {"type": "string", "default": "image_paths"} + } + Outputs = ["out"] + Category = "" + + def __init__(self, batch_size, item_key): + super().__init__(batch_size, item_key) + + # Custom defined function that will execute in parallel + @staticmethod + def dump_fn(image: Image.Image, output_path: str): + image.save(output_path) + + @staticmethod + def on_item_batch(tensors: List[torch.Tensor], items: List[dict], notes: List[Note]): + # Generate images + ... + + args = [] + for image, item in zip(images, items): + input_path = items['value'] + output_path = input_path.replace('.jpg', '_processed.jpg') + args.append((image, output_path)) + return args + +Here, we override the :meth:`graphbook.steps.BatchStep.dump_fn` method to define our custom function to dump images to disk in parallel with the main process. +The event :meth:`graphbook.steps.BatchStep.on_item_batch` can return a list of parameters to pass to ``dump_fn(**args)`` for each element in the return output. diff --git a/docs/learn/first-workflow.rst b/docs/learn/examples/hello-world.rst similarity index 80% rename from docs/learn/first-workflow.rst rename to docs/learn/examples/hello-world.rst index 2e2fda0..3785e96 100644 --- a/docs/learn/first-workflow.rst +++ b/docs/learn/examples/hello-world.rst @@ -1,36 +1,15 @@ .. meta:: - :description: Learn how to create your first workflow in Graphbook. - :twitter:description: Learn how to create your first workflow in Graphbook. + :description: Follow this simple guide to get started with Graphbook. Learn how to create your first workflow, connect nodes together, and process data. + :twitter:description: Follow this simple guide to get started with Graphbook. Learn how to create your first workflow, connect nodes together, and process data. -Create Your First Workflow -########################## +Hello World +########### -If you haven't already, make sure to install Graphbook by following the installation :ref:`guide`. -Once Graphbook is installed, execute ``graphbook`` in a directory that you want to create your workflows in. -Also, because workflow files are serialized as **.json** files, it is also recommended to track your workflows and custom nodes in a version control system like Git. +This guide will walk you through creating your first Graphbook workflow. +If you have already followed :ref:`Basics`, you should be familiar with the basics of Graphbook, and we recommend that you skip this section. -.. code-block:: console - - $ mkdir my_graphbook_project - $ cd my_graphbook_project - $ git init - $ graphbook - -Executing ``graphbook`` should automatically create a directory called `workflow` containing two subdirectories called `custom_nodes` and `docs` like so: - -:: - - my_graphbook_project - └── workflow - ├── custom_nodes - └── docs - -All of your custom nodes should be located inside of `workflow/custom_nodes`. -Graphbook is tracking that directory for any files ending with **.py** and will automatically detect classes that inherit from **Step** or **Resource** and functions defined and decorated with **@step** or **@resource**. - - -Write Your First Graphbook Step -=============================== +Create a Step +============= Inside of your custom nodes directory, create a new Python file called `my_first_nodes.py`, and create the below step inside of it: diff --git a/docs/learn/image-segmentation.rst b/docs/learn/examples/image-segmentation.rst similarity index 100% rename from docs/learn/image-segmentation.rst rename to docs/learn/examples/image-segmentation.rst diff --git a/docs/learn/examples/index.rst b/docs/learn/examples/index.rst new file mode 100644 index 0000000..cc24496 --- /dev/null +++ b/docs/learn/examples/index.rst @@ -0,0 +1,14 @@ +.. meta:: + :description: Learn Graphbook by example. Discover how to create practical workflows to speed up your data processing. + :twitter:description: Learn Graphbook by example. Discover how to create practical workflows to speed up your data processing. + +Learn by Example +################ + +.. toctree:: + :caption: In this section + :titlesonly: + + hello-world + pokemon-classifier + image-segmentation diff --git a/docs/learn/pokemon-classifier.rst b/docs/learn/examples/pokemon-classifier.rst similarity index 100% rename from docs/learn/pokemon-classifier.rst rename to docs/learn/examples/pokemon-classifier.rst diff --git a/docs/learn/filter.rst b/docs/learn/filter.rst new file mode 100644 index 0000000..197f7e2 --- /dev/null +++ b/docs/learn/filter.rst @@ -0,0 +1,312 @@ +.. meta:: + :description: How to filter data with Graphbook. Learn how we can filter notes and handle notes differently as they traverse different branches. + :twitter:description: How to filter data with Graphbook. Learn how we can filter notes and handle notes differently as they traverse different branches. + +.. _Filter: + +Filter +###### + +Filtering data based on certain conditions and criteria is a common operation in data processing pipelines. +Graphbook provides a way to filter notes and handle notes differently as they traverse different branches. +Every step can have multiple output slots, and each slot can be used to route notes to different parts of the graph. +You can route a note to a specific output with the :meth:`graphbook.steps.Step.forward_note` method or event. + +Filter Notes +============ + +.. tip:: + + The default output slot is "out". To create other output slots, use the :func:`graphbook.output` decorator. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook import step, param, output, Note + + @step("Filter", event="forward_note") + @param("threshold", type="number", default=0.5) + @output("good", "junk") + def filter(ctx, note: Note): + if note['value'] > ctx.threshold: + return { "good": [note] } + return { "junk": [note] } + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook.steps import Step + from graphbook import Note + + class Filter(Step): + RequiresInput = True + Parameters = { + "threshold": { + "type": "number", + "default": 0.5 + } + } + Outputs = ["good", "junk"] + Category = "" + def __init__(self, threshold): + super().__init__() + self.threshold = threshold + + def forward_note(self, note: Note) -> str: + if note['value'] > ctx.threshold: + return { "good": [note] } + return { "junk": [note] } + +Or otherwise, you can return just the output slot name as a short-hand way of routing the note. + + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + ... + + def filter(ctx, note: Note): + if note['value'] > ctx.threshold: + return "good" + return "junk" + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + ... + + def forward_note(self, note: Note) -> str: + if note['value'] > self.threshold: + return "good" + return "junk" + +Filter Based on a Given Function +================================ + +You can also filter notes based on a given function with the resource :class:`graphbook.resources.FunctionResource`. +In the UI, a user can create such resource and provide it as a parameter to the step. +To use it, you can call the function with the note as an argument and return its output like so: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook import step, param, output, Note + from graphbook.utils import transform_function_string + + @step("Filter", event="__init__") + def setup_fn(ctx, fn: str): + ctx.fn = transform_function_string(ctx.fn) + + @step("Filter", event="forward_note") + @param("fn", type="resource") + @output("TRUE", "FALSE") + def filter(ctx, note: Note): + split_result = ctx.fn(note=note) + if split_result: + return "TRUE" + return "FALSE" + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook.steps import Step + from graphbook import Note + from graphbook.utils import transform_function_string + + class Filter(Step): + RequiresInput = True + Parameters = {"fn": {"type": "resource"}} + Outputs = ["TRUE", "FALSE"] + Category = "" + + def __init__(self, fn): + super().__init__() + self.fn = transform_function_string(fn) + + def forward_note(self, note) -> str: + split_result = self.fn(note=note) + if split_result: + return "TRUE" + return "FALSE" + +.. tip:: + + The above Step is essentially already implemented as a built-in step in Graphbook :class:`graphbook.steps.Split`. + +Delete Notes +============ + +Graphbook automatically keeps all output Notes in memory for visualization and monitoring, but sometimes, you don't want to retain filtered Notes at all. +By deleting them, you can conserve memory as long as you're sure you won't need them later in the workflow. + +.. warning:: + + You will not be able to view deleted notes in the Graphbook UI. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + ... + + def filter(ctx, note: Note): + if note['value'] > ctx.threshold: + return "good" + return {} # Delete the note + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + ... + + def forward_note(self, note: Note) -> str: + if note['value'] > self.threshold: + return "good" + return {} # Delete the note + +Clone and Versioning Notes +========================== + +You may want to copy and create different notes based on a single note. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/clone_nodes.py + + from graphbook import step, param, output, Note + import copy + + @step("Duplicate", event="forward_note") + def filter(ctx, note: Note): + note_v1 = copy.deepcopy(note) + note_v1['version'] = 1 + note_v2 = copy.deepcopy(note) + note_v2['version'] = 2 + + return { "out": [note_v1, note_v2] } + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook.steps import Step + from graphbook import Note + import copy + + class Duplicate(Step): + RequiresInput = True + Parameters = {} + Outputs = ["out"] + Category = "" + def __init__(self): + super().__init__() + + def forward_note(self, note: Note) -> str: + note_v1 = copy.deepcopy(note) + note_v1['version'] = 1 + note_v2 = copy.deepcopy(note) + note_v2['version'] = 2 + + return { "out": [note_v1, note_v2] } + +Being able to generate new notes from a single note can be useful if the entity that the note describes can be split into two or more entities. +For example, if a note contains multiple images and the images may be associated with completely different entities, you can split them into those entities, so that we maintain a one-to-one relationship between the note and the entity. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook import step, param, output, Note + import copy + + @step("FixImages", event="forward_note") + @output("dog", "cat") + def filter(ctx, note: Note): + dog_images = [] + cat_images = [] + for image in note['images']: + if image['prediction'] == 'dog': + dog_images.append(image) + else: + cat_images.append(image) + + outputs = {} + if len(dog_images) > 0: + note_dog = copy.deepcopy(note) + note_dog['images'] = dog_images + outputs["dog"] = [note_dog] + if len(cat_images) > 0: + note_cat = copy.deepcopy(note) + note_cat['images'] = cat_images + outputs["cat"] = [note_cat] + + return outputs + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/filter_nodes.py + + from graphbook.steps import Step + from graphbook import Note + + class FixImages(Step): + RequiresInput = True + Parameters = { + } + Outputs = ["dog", "cat"] + Category = "" + def __init__(self): + super().__init__() + + def forward_note(self, note: Note) -> str: + dog_images = [] + cat_images = [] + for image in note['images']: + if image['prediction'] == 'dog': + dog_images.append(image) + else: + cat_images.append(image) + + outputs = {} + if len(dog_images) > 0: + note_dog = copy.deepcopy(note) + note_dog['images'] = dog_images + outputs["dog"] = [note_dog] + if len(cat_images) > 0: + note_cat = copy.deepcopy(note) + note_cat['images'] = cat_images + outputs["cat"] = [note_cat] + + return outputs + diff --git a/docs/learn/index.rst b/docs/learn/index.rst index 5a7eb66..51b8f74 100644 --- a/docs/learn/index.rst +++ b/docs/learn/index.rst @@ -8,30 +8,18 @@ Learn All of the guides in this section are meant to help you get started with building ML workflows in Graphbook. The resulting workflows and custom nodes can be found in :ref:`examples`. -Introduction -============ - -Keep in mind that when working with Graphbook, the development cycle in building a workflow can be illustrated in a few simple steps: - -#. **Build in Python** - - Write processing nodes using Python in your favorite code editor - -#. **Assemble in Graphbook** - - Assemble an ML workflow in the Graphbook web UI with your own processing nodes - -#. **Execute** - - Run, monitor, and adjust parameters in your workflow - ---- .. toctree:: :caption: In this section + :titlesonly: concepts - first-workflow + basics load-images - pokemon-classifier - image-segmentation + prompt + filter + document + batch + dump + examples/index diff --git a/docs/learn/load-images.rst b/docs/learn/load-images.rst index ba252da..cbc3122 100644 --- a/docs/learn/load-images.rst +++ b/docs/learn/load-images.rst @@ -2,6 +2,8 @@ :description: Learn how to load a source of images into your data processing pipelines. :twitter:description: Learn how to load a source of images into your data processing pipelines. +.. _Load Images: + Load Images ########### diff --git a/docs/learn/prompt.rst b/docs/learn/prompt.rst new file mode 100644 index 0000000..5a4f2f2 --- /dev/null +++ b/docs/learn/prompt.rst @@ -0,0 +1,179 @@ +.. meta:: + :description: Learn how to prompt users during executing of your data processing pipelines. Prompting can be useful for collecting user input, labeling data, or for providing feedback during execution. + :twitter:description: Learn how to prompt users during executing of your data processing pipelines. Prompting can be useful for collecting user input, labeling data, or for providing feedback during execution. + +Prompts +####### + +Graphbook supports human-in-the-loop steps with prompts. +Prompting can be useful for collecting user input, labeling data, or for providing feedback during execution. +You can make your own prompts with the help of the :func:`graphbook.prompt` decorator or by inheriting from the :class:`graphbook.steps.PromptStep` class. + +.. seealso:: + + :mod:`graphbook.prompts` - List of available prompts in Graphbook. + +Prompt for Boolean Feedback +=========================== + +You can prompt users for boolean feedback with the :func:`graphbook.prompts.bool_prompt` function. +For example, you can ask users if an image is a dog and assign a label based on the user's response. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/prompted_nodes.py + + from graphbook import step, prompt, Note + from graphbook.prompts import bool_prompt + + @staticmethod + def is_dog(note: Note): + return bool_prompt(note, msg="Is this a dog?", show_images=True) + + @step("Prompts/CatVsDog") + @prompt(is_dog) + @staticmethod + def label_images(note: Note, response: str): + note["label"] = "dog" if response "Yes" else "cat" + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/prompted_nodes.py + + from graphbook.prompts import bool_prompt + + class CatVsDog(PromptStep): + RequiresInput = True + Parameters = { + } + Outputs = ["out"] + Category = "Prompts" + def __init__(self): + super().__init__() + + def get_prompt(self, note: Note): # Override + return bool_prompt(note, show_images=True) + + def on_prompt_response(self, note: Note, response: Any): # Override + note["label"] = "dog" if response == "Yes" else "cat" + + +The function that is decorated is called when a response is received from the prompt. +The decorator accepts a function that returns a graphbook prompt object. +If no argument is passed to the decorator, by default, the step will always send a boolean prompt with the default parameters. + +Prompt for Multiple Options +=========================== + +You can also prompt users for multiple options with the :func:`graphbook.prompts.selection_prompt` function. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/prompted_nodes.py + + from graphbook import step, prompt, Note + from graphbook.prompts import selection_prompt + + @staticmethod + def select_option(note: Note): + return selection_prompt( + note, + msg="Select an option", + options=["airplane", "bicycle", "car", "train", "junk data"], + ) + + @step("Prompts/Vehicles") + @prompt(select_option) + def select_option(ctx, note: Note, response: str): + ctx.log("Selected option: ", response) + note["label"] = response + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/prompted_nodes.py + + from graphbook.prompts import selection_prompt + + class Vehicles(PromptStep): + RequiresInput = True + Parameters = { + } + Outputs = ["out"] + Category = "Prompts" + def __init__(self): + super().__init__() + + def get_prompt(self, note: Note): # Override + return selection_prompt( + note, + msg="Select an option", + options=["airplane", "bicycle", "car", "train", "junk data"], + ) + + def on_prompt_response(self, note: Note, response: Any): # Override + self.log("Selected option: ", response) + note["label"] = response + +Conditional Prompts +=================== + +You can also conditionally prompt users based on the data inside of the note. +For example, you can prompt users only if the model is not confident about the prediction based on its logits +or by some other metric. +To not prompt the user on a given note, all you have to do is return ``None``. + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/prompted_nodes.py + + from graphbook import step, prompt, Note + from graphbook.prompts import bool_prompt + + @staticmethod + def is_dog(note: Note): + if note["model_confidence"] < 0.8: + return bool_prompt(note, msg=f"Model predicted {note['label']}. Is this correct?", show_images=True) + return None + + @step("Prompts/ConditionalCatVsDog") + @prompt(is_dog) + @staticmethod + def label_images(note: Note, response: str): + if response == "No": + note["label"] = "dog" if note["label"] == "cat" else "cat" + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/prompted_nodes.py + + from graphbook.prompts import bool_prompt + + class ConditionalCatVsDog(PromptStep): + RequiresInput = True + Parameters = { + } + Outputs = ["out"] + Category = "Prompts" + def __init__(self): + super().__init__() + + def get_prompt(self, note: Note): # Override + if note["model_confidence"] < 0.8: + return bool_prompt(note, msg=f"Model predicted {note['label']}. Is this correct?", show_images=True) + return None + + def on_prompt_response(self, note: Note, response: Any): # Override + if response == "No": + note["label"] = "dog" if note["label"] == "cat" else "cat" diff --git a/docs/reference/decorators.rst b/docs/reference/decorators.rst index 4c81fb8..470f644 100644 --- a/docs/reference/decorators.rst +++ b/docs/reference/decorators.rst @@ -5,6 +5,10 @@ Decorators ########## +.. rst-class:: lead + + This section contains detailed reference documentation for working with Graphbook decorators, the recommended way. + .. autodecorator:: graphbook.step .. autodecorator:: graphbook.batch diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 51ce20a..548c7cd 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -19,5 +19,6 @@ Reference resources notes decorators + params prompts utils diff --git a/docs/reference/notes.rst b/docs/reference/notes.rst index 459c18d..adafc81 100644 --- a/docs/reference/notes.rst +++ b/docs/reference/notes.rst @@ -5,4 +5,8 @@ Notes ##### +.. rst-class:: lead + + Below delivers the documentation details for Note, the atomical unit of data that flows across a Graphbook pipeline. + .. autoclass:: graphbook.Note diff --git a/docs/reference/params.rst b/docs/reference/params.rst new file mode 100644 index 0000000..e4b2c13 --- /dev/null +++ b/docs/reference/params.rst @@ -0,0 +1,174 @@ +.. meta:: + :description: Graphbook Reference Documentation for parameters. A list of the available parameter types that can be used in Graphbook. + :twitter:description: Graphbook Reference Documentation for parameters. A list of the available parameter types that can be used in Graphbook. + +Parameters +########## + +.. rst-class:: lead + + Graphbook supports a variety of parameter types that can be used to define the inputs and outputs of your steps. + +.. _Available Parameters: + +Available Parameters +==================== + +Below is a list of the available values that can be passed as a parameter type in Graphbook: + +* string +* number +* boolean +* function +* list[string] +* list[number] +* list[boolean] +* list[function] +* dict +* `resource`\* + +\* Any type that is not (string..dict) will default to `resource`. +A resource does not correspond to a widget type but is used to indicate that the parameter accepts resource nodes. + +Examples +======== + +Below shows an example for string and number parameters. Multiple parameters can be used at the same time: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook import Note, step, param + + @step("MyStep") + @param("message", type="string", default="Hello, World!") + @param("offset", type="number", default=0) + def my_step(ctx, note: Note): + note["message"] = ctx.message + note["offset"] = ctx.offset + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook.steps import Step + from graphbook import Note + + class MyStep(Step): + RequiresInput = True + Parameters = { + "message": { + "type": "string", + "default": "Hello, World!" + }, + "offset": { + "type": "number", + "default": 0 + } + } + Outputs = ["out"] + Category = "" + def __init__(self, message, offset): + super().__init__() + self.message = message + self.offset = offset + + def on_note(self, note: Note) -> str: + note["message"] = self.message + note["offset"] = self.offset + +Below shows an example for a list of strings: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook import Note, step, param + + @step("MyStep") + @param("cars", type="list[string]", default=["car", "truck"]) + def my_step(ctx, note: Note): + # Access from ctx.cars + ... + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook.steps import Step + from graphbook import Note + + class MyStep(Step): + RequiresInput = True + Parameters = { + "cars": { + "type": "list[string]", + "default": ["car", "truck"] + }, + } + Outputs = ["out"] + Category = "" + def __init__(self, cars): + super().__init__() + self.cars = cars + +Below shows an example for a dictionary: + +.. tab-set:: + + .. tab-item:: function (recommended) + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook import Note, step, param + + @step("MyStep") + @param("car", type="dict", default={ + "make": "Toyota", + "model": "Camry", + "price": 25000, + "in_stock": True + }) + def my_step(ctx, note: Note): + # Access from ctx.car + ... + + .. tab-item:: class + + .. code-block:: python + :caption: custom_nodes/my_steps.py + + from graphbook.steps import Step + from graphbook import Note + + class MyStep(Step): + RequiresInput = True + Parameters = { + "car": { + "type": "dict", + "default": { + "make": "Toyota", + "model": "Camry", + "price": 25000, + "in_stock": True + } + }, + } + Outputs = ["out"] + Category = "" + def __init__(self, car): + super().__init__() + self.car = car + + def on_note(self, note: Note): + ... diff --git a/web/src/components/Nodes/widgets/Widgets.tsx b/web/src/components/Nodes/widgets/Widgets.tsx index 921416a..8ec3f71 100644 --- a/web/src/components/Nodes/widgets/Widgets.tsx +++ b/web/src/components/Nodes/widgets/Widgets.tsx @@ -78,7 +78,7 @@ export function BooleanWidget({ name, def, onChange, style }) { return }, [style, def]); return ( - + {name} {input}