Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Multi-hop RAG opt + improvement] [V0.3.0 Cycle Graph] #223

Merged
merged 26 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ba35ac4
fix the api in the word count code and generate better diagnose report
liyin2015 Sep 23, 2024
15e1ab4
multi-hop retriever, design choice with cycle graph
liyin2015 Sep 24, 2024
5e71fd2
structure the component trace such as input_args, full_response, and …
liyin2015 Sep 24, 2024
3f6e34d
replace the retriever in vanilla rag with the multi-hop rag works well
liyin2015 Sep 24, 2024
c05b742
delete type_extension
liyin2015 Oct 1, 2024
4cd811e
commit
liyin2015 Oct 6, 2024
2ff5aac
Merge branch 'main' into li
liyin2015 Oct 26, 2024
ae5db35
able to diagnose a multi-hop rag with end to end eval, but the trace …
liyin2015 Oct 26, 2024
90959f4
used ComponentList to replace normal List, and make it visible to com…
liyin2015 Oct 27, 2024
485bae7
fix the generator logger_metadata.json to include all generator recur…
liyin2015 Oct 27, 2024
e69a353
merge to main
liyin2015 Oct 27, 2024
2fc67d3
able to train multi-hop rag with demo, the backtrace of dataclass and…
liyin2015 Oct 28, 2024
2cc4196
rebase to main and format .ipynb
liyin2015 Dec 5, 2024
4e58f9a
add data pipeline in the main
liyin2015 Dec 5, 2024
27e8be0
add rag in the tutorials code
liyin2015 Dec 10, 2024
0f96139
make the role of the loss component clear
liyin2015 Dec 11, 2024
fbe3348
ensure when the generator fails in the middle of a map, use the raw_r…
liyin2015 Dec 12, 2024
343fc63
fully fix the bug
liyin2015 Dec 12, 2024
12a3ded
first end to end optimization on the task description using text grad…
liyin2015 Dec 13, 2024
3f0c813
rebase on main
liyin2015 Dec 14, 2024
0b8705e
start to trace the dev meta prompts of backward engines and the tgd o…
liyin2015 Dec 15, 2024
4c8f6ba
create better debug report to show more detailed paths and files
liyin2015 Dec 15, 2024
0439354
fixed the sorting bug in the gradients and skipped the good examples …
liyin2015 Dec 16, 2024
6c5841a
wrap up multi hop rag
liyin2015 Dec 16, 2024
53171f5
fix the test issues
liyin2015 Dec 16, 2024
1ec2557
update the lock file in adalflow
liyin2015 Dec 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions adalflow/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## [0.2.7] - 2024-09-23

### Improved
- Better diagnose report for `Trainer.diagnose`.
- Multi-hop RAG with handling of Cycle.

## [0.2.7] - TO Be Released
### Added
- `Memory` is completed with `call` and `add_dialog_turn` methods.
Expand Down
12 changes: 11 additions & 1 deletion adalflow/adalflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__version__ = "0.2.6"

from adalflow.core.component import Component, fun_to_component
from adalflow.core.container import Sequential
from adalflow.core.container import Sequential, ComponentList
from adalflow.core.base_data_class import DataClass, DataClassFormatType, required_field

from adalflow.optim.grad_component import GradComponent
Expand Down Expand Up @@ -63,6 +63,10 @@
BedrockAPIClient,
)

# data pipeline
from adalflow.components.data_process.text_splitter import TextSplitter
from adalflow.components.data_process.data_components import ToEmbeddings

__all__ = [
"Component",
"fun_to_component",
Expand All @@ -72,7 +76,10 @@
"required_field",
# Container
"Sequential",
"ComponentList",
# Grad Component
"GradComponent",
# Functional Component
"ModelClient",
"Generator",
"Embedder",
Expand All @@ -99,6 +106,9 @@
"JsonOutputParser",
"ListOutputParser",
"DataClassParser",
# Data Pipeline
"TextSplitter",
"ToEmbeddings",
# Types
"GeneratorOutput",
"EmbedderOutput",
Expand Down
3 changes: 2 additions & 1 deletion adalflow/adalflow/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .base_data_class import DataClass, required_field, DataClassFormatType

from .component import Component, FunComponent, fun_to_component
from .container import Sequential
from .container import Sequential, ComponentList
from .db import LocalDB
from .default_prompt_template import DEFAULT_ADALFLOW_SYSTEM_PROMPT
from .embedder import Embedder, BatchEmbedder
Expand Down Expand Up @@ -50,6 +50,7 @@
"LocalDB",
"Component",
"Sequential",
"ComponentList",
"FunComponent",
"fun_to_component",
"DataClass",
Expand Down
2 changes: 0 additions & 2 deletions adalflow/adalflow/core/base_data_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,6 @@ class TrecDataList(DataClass):

return dict(ordered_dict)

return ordered_dict

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "DataClass":
"""Create a dataclass instance from a dictionary.
Expand Down
1 change: 1 addition & 0 deletions adalflow/adalflow/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def use_teacher(self, mode: bool = True):
component.use_teacher(mode)
return self

# TODO: reassese trace, it should be turned on maybe all the time
def trace(self, mode: bool = True):
r"""Sets the component in tracing mode.This signal will be used in forward and backward to accumulate input and output."""
if not isinstance(mode, bool):
Expand Down
233 changes: 228 additions & 5 deletions adalflow/adalflow/core/container.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,63 @@
"""Container component for composing multiple components, such as Sequential."""

from collections import OrderedDict
"""
Container component for composing multiple components, such as Sequential
and ComponentList.

This design draws inspiration from PyTorch’s modular
container patterns, including `nn.Sequential` and `nn.ModuleList`. The
`Container` component allows for grouping several components into one, enabling
flexible and reusable model architectures.

Design Motivation:
-------------------
This implementation follows the same principles as PyTorch’s component-based
design, encouraging modularity, reusability, and extensibility. The `Container`
component provides an easy way to manage multiple layers or other components,
while ensuring that their parameters are properly registered and updated during
training.

Credits:
---------
The design of this component takes inspiration from the PyTorch project
(https://pytorch.org). PyTorch is an open-source deep learning framework,
licensed under a BSD-style license. Although this code is not part of the
official PyTorch library, it mirrors the same design principles.

For more details on PyTorch’s licensing, refer to:
https://github.com/pytorch/pytorch/blob/main/LICENSE

Usage Example:
--------------
class MyModule(nn.Module):
def __init__(self):
super().__init__()

self.model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])

def forward(self, x):
# ModuleList can act as an iterable, or be indexed using ints
for i, l in enumerate(self.linears):
x = self.linears[i // 2](x) + l(x)
return x

"""

from collections import OrderedDict, abc as container_abcs
import operator
from itertools import islice
from typing import TypeVar, Dict, Union, Iterable, Iterator, Any, overload
from itertools import islice, chain
from typing import TypeVar, Dict, Union, Iterable, Iterator, Any, overload, Optional

from adalflow.core.component import Component

T = TypeVar("T", bound=Component)

__all__ = ["Sequential", "ComponentList"]


class Sequential(Component):
__doc__ = r"""A sequential container.
Expand Down Expand Up @@ -311,3 +360,177 @@ def extend(self, components: Iterable[Component]) -> "Sequential":
for component in components:
self.append(component)
return self


def _addindent(s_: str, numSpaces: int):
s = s_.split("\n")
# don't do anything for single-line stuff
if len(s) == 1:
return s_
first = s.pop(0)
s = [(numSpaces * " ") + line for line in s]
s = "\n".join(s)
s = first + "\n" + s
return s


class ComponentList(Component):
__doc__ = r"""Holds subcomponents in a list.

:class:`adalflow.core.ComponentList` can be indexed like a regular Python list, but
the components it holds are properly registered, and will be visible by all
:class:`adalflow.core.Component` methods.

Args:
components (iterable, optional): an iterable of components to add

Examples:

.. code-block:: python

# Example of how to use ComponentList
class MyComponents(Component):
def __init__(self):
super().__init__()
self.llms = ComponentList([adal.Generator() for i in range(10)])

def forward(self, x):
for layer in self.layers:
x = layer(x)
return x
"""
_components: Dict[str, Component] = OrderedDict()

def __init__(self, components: Optional[Iterable[Component]] = None) -> None:
super().__init__()
if components is not None:
self += components

def _get_abs_string_index(self, idx):
"""Get the absolute index as a string."""
idx = operator.index(idx)
if not (-len(self) <= idx < len(self)):
raise IndexError(f"index {idx} is out of range")
if idx < 0:
idx += len(self)
return str(idx)

def __getitem__(self, idx: Union[int, slice]) -> Union[Component, "ComponentList"]:
"""Retrieve a component or a slice of components."""
if isinstance(idx, slice):
return self.__class__(list(self._components.values())[idx])
else:
return self._components[self._get_abs_string_index(idx)]

def __setitem__(self, idx: int, component: Component) -> None:
"""Set a component at the given index."""
idx = self._get_abs_string_index(idx)
return setattr(self, str(idx), component)

def __delitem__(self, idx: Union[int, slice]) -> None:
"""Delete a component or a slice of components."""
if isinstance(idx, slice):
for k in range(len(self._components))[idx]:
delattr(self, str(k))
else:
delattr(self, self._get_abs_string_index(idx))
# To preserve numbering, self._components is being reconstructed with modules after deletion
str_indices = [str(i) for i in range(len(self._components))]
self._components = OrderedDict(
list(zip(str_indices, self._components.values()))
)

def __len__(self) -> int:
"""Return the number of components."""
return len(self._components)

def __iter__(self) -> Iterator[Component]:
"""Iterate over the components."""
return iter(self._components.values())

def __iadd__(self, components: Iterable[Component]) -> "ComponentList":
"""Add multiple components using the `+=` operator."""

return self.extend(components)

def __add__(self, other: Iterable[Component]) -> "ComponentList":
"""Concatenate two ComponentLists."""

combined = ComponentList()
for i, component in enumerate(chain(self, other)):
combined.add_component(str(i), component)
return combined

def __repr__(self):
"""Return a custom repr for ModuleList that compresses repeated module representations."""
list_of_reprs = [repr(item) for item in self]
if len(list_of_reprs) == 0:
return self._get_name() + "()"

start_end_indices = [[0, 0]]
repeated_blocks = [list_of_reprs[0]]
for i, r in enumerate(list_of_reprs[1:], 1):
if r == repeated_blocks[-1]:
start_end_indices[-1][1] += 1
continue

start_end_indices.append([i, i])
repeated_blocks.append(r)

lines = []
main_str = self._get_name() + "("
for (start_id, end_id), b in zip(start_end_indices, repeated_blocks):
local_repr = f"({start_id}): {b}" # default repr

if start_id != end_id:
n = end_id - start_id + 1
local_repr = f"({start_id}-{end_id}): {n} x {b}"

local_repr = _addindent(local_repr, 2)
lines.append(local_repr)

main_str += "\n " + "\n ".join(lines) + "\n"
main_str += ")"
return main_str

def __dir__(self):
keys = super().__dir__()
keys = [key for key in keys if not key.isdigit()]
return keys

def insert(self, index: int, component: Component) -> None:
"""Insert a component at the specified index."""
for i in range(len(self._components), index, -1):
self._components[str(i)] = self._components[str(i - 1)]
self._components[str(index)] = component

def pop(self, index: Union[int, slice]) -> Component:
"""Remove and return a component at the given index."""
component = self[index]
del self[index]
return component

def append(self, component: Component) -> "ComponentList":
"""Append a component to the list."""
# self._components[str(len(self))] = component
self.add_component(str(len(self)), component)
return self

def extend(self, components: Iterable[Component]) -> "ComponentList":
"""Extend the list by appending multiple components."""
# for component in components:
# self.append(component)
# return self

if not isinstance(components, container_abcs.Iterable):
raise TypeError(
"ModuleList.extend should be called with an "
"iterable, but got " + type(components).__name__
)
offset = len(self)
for i, component in enumerate(components):
self.add_component(str(offset + i), component)
return self


# TODO: need to do the same to ParameterList and ParameterDict, ModuleDict
Loading
Loading