Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Multi-hop RAG opt + improvement] [V0.3.0 Cycle Graph] #223

Merged
merged 26 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ba35ac4
fix the api in the word count code and generate better diagnose report
liyin2015 Sep 23, 2024
15e1ab4
multi-hop retriever, design choice with cycle graph
liyin2015 Sep 24, 2024
5e71fd2
structure the component trace such as input_args, full_response, and …
liyin2015 Sep 24, 2024
3f6e34d
replace the retriever in vanilla rag with the multi-hop rag works well
liyin2015 Sep 24, 2024
c05b742
delete type_extension
liyin2015 Oct 1, 2024
4cd811e
commit
liyin2015 Oct 6, 2024
2ff5aac
Merge branch 'main' into li
liyin2015 Oct 26, 2024
ae5db35
able to diagnose a multi-hop rag with end to end eval, but the trace …
liyin2015 Oct 26, 2024
90959f4
used ComponentList to replace normal List, and make it visible to com…
liyin2015 Oct 27, 2024
485bae7
fix the generator logger_metadata.json to include all generator recur…
liyin2015 Oct 27, 2024
e69a353
merge to main
liyin2015 Oct 27, 2024
2fc67d3
able to train multi-hop rag with demo, the backtrace of dataclass and…
liyin2015 Oct 28, 2024
2cc4196
rebase to main and format .ipynb
liyin2015 Dec 5, 2024
4e58f9a
add data pipeline in the main
liyin2015 Dec 5, 2024
27e8be0
add rag in the tutorials code
liyin2015 Dec 10, 2024
0f96139
make the role of the loss component clear
liyin2015 Dec 11, 2024
fbe3348
ensure when the generator fails in the middle of a map, use the raw_r…
liyin2015 Dec 12, 2024
343fc63
fully fix the bug
liyin2015 Dec 12, 2024
12a3ded
first end to end optimization on the task description using text grad…
liyin2015 Dec 13, 2024
3f0c813
rebase on main
liyin2015 Dec 14, 2024
0b8705e
start to trace the dev meta prompts of backward engines and the tgd o…
liyin2015 Dec 15, 2024
4c8f6ba
create better debug report to show more detailed paths and files
liyin2015 Dec 15, 2024
0439354
fixed the sorting bug in the gradients and skipped the good examples …
liyin2015 Dec 16, 2024
6c5841a
wrap up multi hop rag
liyin2015 Dec 16, 2024
53171f5
fix the test issues
liyin2015 Dec 16, 2024
1ec2557
update the lock file in adalflow
liyin2015 Dec 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions adalflow/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## [0.2.4] - 2024-09-23

### Improved
- Better diagnose report for `Trainer.diagnose`.
- Multi-hop RAG with handling of Cycle.
## [0.2.3] - 2024-09-20

### Rename
Expand Down
4 changes: 2 additions & 2 deletions adalflow/adalflow/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .component import Component, FunComponent, fun_to_component
from .container import Sequential
from .db import LocalDB
from .default_prompt_template import DEFAULT_LIGHTRAG_SYSTEM_PROMPT
from .default_prompt_template import DEFAULT_ADALFLOW_SYSTEM_PROMPT
from .embedder import Embedder, BatchEmbedder
from .generator import Generator, BackwardEngine
from .model_client import ModelClient
Expand Down Expand Up @@ -58,7 +58,7 @@
"Generator",
"BackwardEngine",
"Prompt",
"DEFAULT_LIGHTRAG_SYSTEM_PROMPT",
"DEFAULT_ADALFLOW_SYSTEM_PROMPT",
# "Parameter",
"required_field",
"ModelClient",
Expand Down
16 changes: 8 additions & 8 deletions adalflow/adalflow/core/default_prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
"""

__all__ = [
"LIGHTRAG_DEFAULT_PROMPT_ARGS",
"LIGHTRAG_DEFAULT_PROMPT_TRAINABLE_PARAMS",
"SIMPLE_DEFAULT_LIGHTRAG_SYSTEM_PROMPT",
"DEFAULT_LIGHTRAG_SYSTEM_PROMPT",
"ADALFLOW_DEFAULT_PROMPT_ARGS",
"ADALFLOW_DEFAULT_PROMPT_TRAINABLE_PARAMS",
"SIMPLE_DEFAULT_ADALFLOW_SYSTEM_PROMPT",
"DEFAULT_ADALFLOW_SYSTEM_PROMPT",
]
# TODO: potentially make a data class for this
LIGHTRAG_DEFAULT_PROMPT_ARGS = [
ADALFLOW_DEFAULT_PROMPT_ARGS = [
"task_desc_str", # task description
"output_format_str", # output format of the task
"tools_str", # tools used in the task
Expand All @@ -21,17 +21,17 @@
"input_str", # user query or input
]

LIGHTRAG_DEFAULT_PROMPT_TRAINABLE_PARAMS = [
ADALFLOW_DEFAULT_PROMPT_TRAINABLE_PARAMS = [
"task_desc_str",
# "output_format_str",
"examples_str",
]

SIMPLE_DEFAULT_LIGHTRAG_SYSTEM_PROMPT = r"""<SYS>{{task_desc_str}}</SYS>
SIMPLE_DEFAULT_ADALFLOW_SYSTEM_PROMPT = r"""<SYS>{{task_desc_str}}</SYS>
User: {{input_str}}
You:"""

DEFAULT_LIGHTRAG_SYSTEM_PROMPT = r"""<START_OF_SYSTEM_PROMPT>
DEFAULT_ADALFLOW_SYSTEM_PROMPT = r"""<START_OF_SYSTEM_PROMPT>
{# task desc #}
{% if task_desc_str %}
{{task_desc_str}}
Expand Down
104 changes: 68 additions & 36 deletions adalflow/adalflow/core/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from adalflow.core.prompt_builder import Prompt
from adalflow.core.functional import compose_model_kwargs
from adalflow.core.model_client import ModelClient
from adalflow.core.default_prompt_template import DEFAULT_LIGHTRAG_SYSTEM_PROMPT
from adalflow.core.default_prompt_template import DEFAULT_ADALFLOW_SYSTEM_PROMPT
from adalflow.optim.function import BackwardContext
from adalflow.utils.cache import CachedEngine
from adalflow.tracing.callback_manager import CallbackManager
Expand Down Expand Up @@ -63,7 +63,7 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
Args:
model_client (ModelClient): The model client to use for the generator.
model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}. Please refer to :ref:`ModelClient<components-model_client>` for the details on how to set the model_kwargs for your specific model if it is from our library.
template (Optional[str], optional): The template for the prompt. Defaults to :ref:`DEFAULT_LIGHTRAG_SYSTEM_PROMPT<core-default_prompt_template>`.
template (Optional[str], optional): The template for the prompt. Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>`.
prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None.
output_processors (Optional[Component], optional): The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None.
trainable_params (Optional[List[str]], optional): The list of trainable parameters. Defaults to [].
Expand All @@ -77,7 +77,7 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
model_client: ModelClient # for better type checking

_use_cache: bool = False
_kwargs: Dict[str, Any] = {}
# _kwargs: Dict[str, Any] = {}

def __init__(
self,
Expand All @@ -95,7 +95,7 @@ def __init__(
cache_path: Optional[str] = None,
use_cache: bool = False,
) -> None:
r"""The default prompt is set to the DEFAULT_LIGHTRAG_SYSTEM_PROMPT. It has the following variables:
r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables:
- task_desc_str
- tools_str
- example_str
Expand All @@ -112,7 +112,7 @@ def __init__(
Got {model_client} instead."
)

template = template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT
template = template or DEFAULT_ADALFLOW_SYSTEM_PROMPT

# Cache
model_str = (
Expand Down Expand Up @@ -146,22 +146,25 @@ def __init__(
# to support better testing on the parts beside of the model call
self.mock_output: bool = False
self.mock_output_data: str = "mock data"
self.data_map_func: Callable = None
self.set_data_map_func()
# self.data_map_func: Callable = None
# self.set_data_map_func()
self.model_str = model_str
self._use_cache = use_cache

self._kwargs = {
"model_client": model_client,
"model_kwargs": model_kwargs,
"template": template,
"prompt_kwargs": prompt_kwargs,
"output_processors": output_processors,
"name": name,
"cache_path": cache_path,
"use_cache": use_cache,
}
# self._kwargs = {
# "model_client": model_client,
# "model_kwargs": model_kwargs,
# "template": template,
# "prompt_kwargs": prompt_kwargs,
# "output_processors": output_processors,
# "name": name,
# "cache_path": cache_path,
# "use_cache": use_cache,
# }
self._teacher: Optional["Generator"] = None
self._trace_api_kwargs: Dict[str, Any] = (
{}
) # used by dynamic computation graph and backpropagation

def get_cache_path(self) -> str:
r"""Get the cache path for the generator."""
Expand Down Expand Up @@ -392,17 +395,17 @@ def set_teacher_generator(self, teacher: "Generator" = None):
print(f"Teacher generator set: {self._teacher}, teacher {teacher}")
log.debug(f"Teacher generator set: {self._teacher}")

def set_data_map_func(self, map_func: Callable = None):
def default_map_func(data: "GeneratorOutputType") -> str:
return (
data.data
if data.data
else self.failure_message_to_backward_engine(data)
)
# def set_data_map_func(self, map_func: Callable = None):
# def default_map_func(data: "GeneratorOutputType") -> str:
# return (
# data.data
# if data.data
# else self.failure_message_to_backward_engine(data)
# )

self.data_map_func = map_func or default_map_func
# self.data_map_func = map_func or default_map_func

log.debug(f"Data map function set: {self.data_map_func}")
# log.debug(f"Data map function set: {self.data_map_func}")

# TODO: limit to only one demo parameter.
@staticmethod
Expand All @@ -414,14 +417,36 @@ def find_demo_parameter(prompt_kwargs: Dict) -> Optional[Parameter]:
return p
return None

# NOTE: when training is true, forward will be called in __call__ instead of call
def forward(
self,
prompt_kwargs: Optional[Dict] = {}, # the input need to be passed to the prompt
prompt_kwargs: Optional[
Dict[str, Union[str, Parameter]]
] = {}, # the input need to be passed to the prompt
model_kwargs: Optional[Dict] = {},
id: Optional[str] = None,
) -> "Parameter":
# 1. call the model
# 1. convert prompt_kwargs to parameter if it is not
for k, v in prompt_kwargs.items():
if not isinstance(v, Parameter):
prompt_kwargs[k] = Parameter(
data=v,
name=f"{self.name}_{k}",
requires_opt=True,
param_type=ParameterType.INPUT,
)

# 2. call the model
unwrapped_prompt_kwargs: Dict[str, Any] = {}
for k, v in prompt_kwargs.items():
if isinstance(v, Parameter):
unwrapped_prompt_kwargs[k] = v.map_to_successor(self)
else:
unwrapped_prompt_kwargs[k] = v

print(
f"unwrapped_prompt_kwargs: {unwrapped_prompt_kwargs}, model_kwargs: {model_kwargs}"
)

output: GeneratorOutputType = None
input_args = {}
if self.mock_output:
Expand All @@ -430,34 +455,34 @@ def forward(
if self.teacher_mode and not isinstance(self, BackwardEngine):
if not self._teacher:
print(
f"prompt_kwargs: {prompt_kwargs}, model_kwargs: {model_kwargs}"
f"unwrapped_prompt_kwargs: {unwrapped_prompt_kwargs}, model_kwargs: {model_kwargs}"
)
print(f"names: {self.name}")
raise ValueError("Teacher generator is not set.")
log.info(f"Using teacher: {self._teacher}")
input_args = {
"prompt_kwargs": compose_model_kwargs(
self._teacher.prompt_kwargs, prompt_kwargs
self._teacher.prompt_kwargs, unwrapped_prompt_kwargs
),
"model_kwargs": compose_model_kwargs(
self._teacher.model_kwargs, model_kwargs
),
}
output = self._teacher.call(prompt_kwargs, model_kwargs)
output = self._teacher.call(**input_args, id=id)
else:
input_args = {
"prompt_kwargs": compose_model_kwargs(
self.prompt_kwargs, prompt_kwargs
self.prompt_kwargs, unwrapped_prompt_kwargs
),
"model_kwargs": compose_model_kwargs(
self.model_kwargs, model_kwargs
),
}
output = self.call(prompt_kwargs, model_kwargs)
output = self.call(**input_args, id=id)
# 2. Generate a Parameter object from the output
combined_prompt_kwargs = compose_model_kwargs(self.prompt_kwargs, prompt_kwargs)
if self.data_map_func is None:
self.set_data_map_func()
# if self.data_map_func is None:
# self.set_data_map_func()

predecessors = [
p for p in combined_prompt_kwargs.values() if isinstance(p, Parameter)
Expand All @@ -477,6 +502,8 @@ def forward(
)
response.set_predecessors(predecessors)
response.trace_forward_pass(input_args=input_args, full_response=output)
# *** special to the generator ***
response.trace_api_kwargs(api_kwargs=self._trace_api_kwargs)
# attach the demo to the demo parameter
# if self.tracing:
demo_param = self.find_demo_parameter(combined_prompt_kwargs)
Expand All @@ -498,6 +525,8 @@ def forward(
"No demo parameter found in the prompt_kwargs. You can not trace the demo data."
)

# **** end of the special to the generator ****

if not self.backward_engine:
# self.set_backward_engine()
log.debug(f"Backward engine: {self.backward_engine}")
Expand Down Expand Up @@ -746,6 +775,7 @@ def call(
log.debug(f"model_kwargs: {model_kwargs}")

api_kwargs = self._pre_call(prompt_kwargs, model_kwargs)

log.debug(f"api_kwargs: {api_kwargs}")
output: GeneratorOutputType = None
# call the model client
Expand Down Expand Up @@ -779,6 +809,7 @@ def call(
)

log.info(f"output: {output}")
self._trace_api_kwargs = api_kwargs # tracing
return output

# TODO: training is not supported in async call yet
Expand Down Expand Up @@ -824,6 +855,7 @@ async def acall(
prompt_kwargs=prompt_kwargs,
model_kwargs=model_kwargs,
)
self._trace_api_kwargs = api_kwargs # tracing
return output

def __call__(self, *args, **kwargs) -> Union[GeneratorOutputType, Any]:
Expand Down
10 changes: 5 additions & 5 deletions adalflow/adalflow/core/prompt_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Class prompt builder for LightRAG system prompt."""
"""Class prompt builder for AdalFlow system prompt."""

from typing import Dict, Any, Optional, List, TypeVar
import logging
Expand All @@ -8,7 +8,7 @@


from adalflow.core.component import Component
from adalflow.core.default_prompt_template import DEFAULT_LIGHTRAG_SYSTEM_PROMPT
from adalflow.core.default_prompt_template import DEFAULT_ADALFLOW_SYSTEM_PROMPT
from adalflow.optim.parameter import Parameter


Expand All @@ -20,10 +20,10 @@
class Prompt(Component):
__doc__ = r"""Renders a text string(prompt) from a Jinja2 template string.

In default, we use the :ref:`DEFAULT_LIGHTRAG_SYSTEM_PROMPT<core-default_prompt_template>` as the template.
In default, we use the :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>` as the template.

Args:
template (str, optional): The Jinja2 template string. Defaults to DEFAULT_LIGHTRAG_SYSTEM_PROMPT.
template (str, optional): The Jinja2 template string. Defaults to DEFAULT_ADALFLOW_SYSTEM_PROMPT.
preset_prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to {}.

Examples:
Expand Down Expand Up @@ -56,7 +56,7 @@ def __init__(
):
super().__init__()

self.template = template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT
self.template = template or DEFAULT_ADALFLOW_SYSTEM_PROMPT
self.__create_jinja2_template()
self.prompt_variables: List[str] = []
for var in self._find_template_variables(self.template):
Expand Down
1 change: 1 addition & 0 deletions adalflow/adalflow/core/string_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def call(self, input: str) -> JSON_PARSER_OUTPUT_TYPE:
YAML_PARSER_OUTPUT_TYPE = JSON_PARSER_OUTPUT_TYPE


# TODO: yaml parser needs to be more robust, currently json works way better than yaml
class YamlParser(Parser):
__doc__ = r"""To extract YAML strings from text and parse them into a YAML object.

Expand Down
3 changes: 0 additions & 3 deletions adalflow/adalflow/optim/grad_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ def forward(self, *args, **kwargs) -> "Parameter":
1. for all args and kwargs, if it is a `Parameter` object, it will be tracked as `Predecessor`.
2. Trace input_args and full_response in the parameter object.
3. Return the parameter object.

TODO: all Gradcomponent should not allow args but only kwargs.
For now, just check if id is in kwargs.
"""

from adalflow.optim.parameter import Parameter
Expand Down
Loading
Loading