diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md
index f87773ab..df1140ce 100644
--- a/adalflow/CHANGELOG.md
+++ b/adalflow/CHANGELOG.md
@@ -1,3 +1,9 @@
+## [0.2.7] - 2024-09-23
+
+### Improved
+- Better diagnose report for `Trainer.diagnose`.
+- Multi-hop RAG with handling of Cycle.
+
 ## [0.2.7] - TO Be Released
 ### Added
 - `Memory` is completed with `call` and `add_dialog_turn` methods.
diff --git a/adalflow/adalflow/__init__.py b/adalflow/adalflow/__init__.py
index fa4cd930..4c9b45ba 100644
--- a/adalflow/adalflow/__init__.py
+++ b/adalflow/adalflow/__init__.py
@@ -1,7 +1,7 @@
 __version__ = "0.2.6"
 
 from adalflow.core.component import Component, fun_to_component
-from adalflow.core.container import Sequential
+from adalflow.core.container import Sequential, ComponentList
 from adalflow.core.base_data_class import DataClass, DataClassFormatType, required_field
 
 from adalflow.optim.grad_component import GradComponent
@@ -63,6 +63,10 @@
     BedrockAPIClient,
 )
 
+# data pipeline
+from adalflow.components.data_process.text_splitter import TextSplitter
+from adalflow.components.data_process.data_components import ToEmbeddings
+
 __all__ = [
     "Component",
     "fun_to_component",
@@ -72,7 +76,10 @@
     "required_field",
     # Container
     "Sequential",
+    "ComponentList",
+    # Grad Component
     "GradComponent",
+    # Functional Component
     "ModelClient",
     "Generator",
     "Embedder",
@@ -99,6 +106,9 @@
     "JsonOutputParser",
     "ListOutputParser",
     "DataClassParser",
+    # Data Pipeline
+    "TextSplitter",
+    "ToEmbeddings",
     # Types
     "GeneratorOutput",
     "EmbedderOutput",
diff --git a/adalflow/adalflow/core/__init__.py b/adalflow/adalflow/core/__init__.py
index 38472520..a4a67c6a 100644
--- a/adalflow/adalflow/core/__init__.py
+++ b/adalflow/adalflow/core/__init__.py
@@ -1,7 +1,7 @@
 from .base_data_class import DataClass, required_field, DataClassFormatType
 
 from .component import Component, FunComponent, fun_to_component
-from .container import Sequential
+from .container import Sequential, ComponentList
 from .db import LocalDB
 from .default_prompt_template import DEFAULT_ADALFLOW_SYSTEM_PROMPT
 from .embedder import Embedder, BatchEmbedder
@@ -50,6 +50,7 @@
     "LocalDB",
     "Component",
     "Sequential",
+    "ComponentList",
     "FunComponent",
     "fun_to_component",
     "DataClass",
diff --git a/adalflow/adalflow/core/base_data_class.py b/adalflow/adalflow/core/base_data_class.py
index daac546d..1a379724 100644
--- a/adalflow/adalflow/core/base_data_class.py
+++ b/adalflow/adalflow/core/base_data_class.py
@@ -356,8 +356,6 @@ class TrecDataList(DataClass):
 
         return dict(ordered_dict)
 
-        return ordered_dict
-
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "DataClass":
         """Create a dataclass instance from a dictionary.
diff --git a/adalflow/adalflow/core/component.py b/adalflow/adalflow/core/component.py
index 28bb794e..d0dd6631 100644
--- a/adalflow/adalflow/core/component.py
+++ b/adalflow/adalflow/core/component.py
@@ -167,6 +167,7 @@ def use_teacher(self, mode: bool = True):
             component.use_teacher(mode)
         return self
 
+    # TODO: reassese trace, it should be turned on maybe all the time
     def trace(self, mode: bool = True):
         r"""Sets the component in tracing mode.This signal will be used in forward and backward to accumulate input and output."""
         if not isinstance(mode, bool):
diff --git a/adalflow/adalflow/core/container.py b/adalflow/adalflow/core/container.py
index bb2a1a54..a941adb1 100644
--- a/adalflow/adalflow/core/container.py
+++ b/adalflow/adalflow/core/container.py
@@ -1,14 +1,63 @@
-"""Container component for composing multiple components, such as Sequential."""
-
-from collections import OrderedDict
+"""
+Container component for composing multiple components, such as Sequential
+and ComponentList.
+
+This design draws inspiration from PyTorch’s modular
+container patterns, including `nn.Sequential` and `nn.ModuleList`. The
+`Container` component allows for grouping several components into one, enabling
+flexible and reusable model architectures.
+
+Design Motivation:
+-------------------
+This implementation follows the same principles as PyTorch’s component-based
+design, encouraging modularity, reusability, and extensibility. The `Container`
+component provides an easy way to manage multiple layers or other components,
+while ensuring that their parameters are properly registered and updated during
+training.
+
+Credits:
+---------
+The design of this component takes inspiration from the PyTorch project
+(https://pytorch.org). PyTorch is an open-source deep learning framework,
+licensed under a BSD-style license. Although this code is not part of the
+official PyTorch library, it mirrors the same design principles.
+
+For more details on PyTorch’s licensing, refer to:
+https://github.com/pytorch/pytorch/blob/main/LICENSE
+
+Usage Example:
+--------------
+    class MyModule(nn.Module):
+        def __init__(self):
+            super().__init__()
+
+            self.model = nn.Sequential(
+                  nn.Conv2d(1,20,5),
+                  nn.ReLU(),
+                  nn.Conv2d(20,64,5),
+                  nn.ReLU()
+                )
+            self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])
+
+        def forward(self, x):
+            # ModuleList can act as an iterable, or be indexed using ints
+            for i, l in enumerate(self.linears):
+                x = self.linears[i // 2](x) + l(x)
+            return x
+
+"""
+
+from collections import OrderedDict, abc as container_abcs
 import operator
-from itertools import islice
-from typing import TypeVar, Dict, Union, Iterable, Iterator, Any, overload
+from itertools import islice, chain
+from typing import TypeVar, Dict, Union, Iterable, Iterator, Any, overload, Optional
 
 from adalflow.core.component import Component
 
 T = TypeVar("T", bound=Component)
 
+__all__ = ["Sequential", "ComponentList"]
+
 
 class Sequential(Component):
     __doc__ = r"""A sequential container.
@@ -311,3 +360,177 @@ def extend(self, components: Iterable[Component]) -> "Sequential":
         for component in components:
             self.append(component)
         return self
+
+
+def _addindent(s_: str, numSpaces: int):
+    s = s_.split("\n")
+    # don't do anything for single-line stuff
+    if len(s) == 1:
+        return s_
+    first = s.pop(0)
+    s = [(numSpaces * " ") + line for line in s]
+    s = "\n".join(s)
+    s = first + "\n" + s
+    return s
+
+
+class ComponentList(Component):
+    __doc__ = r"""Holds subcomponents in a list.
+
+    :class:`adalflow.core.ComponentList` can be indexed like a regular Python list, but
+    the components it holds are properly registered, and will be visible by all
+    :class:`adalflow.core.Component` methods.
+
+    Args:
+        components (iterable, optional): an iterable of components to add
+
+    Examples:
+
+    .. code-block:: python
+
+        # Example of how to use ComponentList
+        class MyComponents(Component):
+            def __init__(self):
+                super().__init__()
+                self.llms = ComponentList([adal.Generator() for i in range(10)])
+
+            def forward(self, x):
+                for layer in self.layers:
+                    x = layer(x)
+                return x
+    """
+    _components: Dict[str, Component] = OrderedDict()
+
+    def __init__(self, components: Optional[Iterable[Component]] = None) -> None:
+        super().__init__()
+        if components is not None:
+            self += components
+
+    def _get_abs_string_index(self, idx):
+        """Get the absolute index as a string."""
+        idx = operator.index(idx)
+        if not (-len(self) <= idx < len(self)):
+            raise IndexError(f"index {idx} is out of range")
+        if idx < 0:
+            idx += len(self)
+        return str(idx)
+
+    def __getitem__(self, idx: Union[int, slice]) -> Union[Component, "ComponentList"]:
+        """Retrieve a component or a slice of components."""
+        if isinstance(idx, slice):
+            return self.__class__(list(self._components.values())[idx])
+        else:
+            return self._components[self._get_abs_string_index(idx)]
+
+    def __setitem__(self, idx: int, component: Component) -> None:
+        """Set a component at the given index."""
+        idx = self._get_abs_string_index(idx)
+        return setattr(self, str(idx), component)
+
+    def __delitem__(self, idx: Union[int, slice]) -> None:
+        """Delete a component or a slice of components."""
+        if isinstance(idx, slice):
+            for k in range(len(self._components))[idx]:
+                delattr(self, str(k))
+        else:
+            delattr(self, self._get_abs_string_index(idx))
+        # To preserve numbering, self._components is being reconstructed with modules after deletion
+        str_indices = [str(i) for i in range(len(self._components))]
+        self._components = OrderedDict(
+            list(zip(str_indices, self._components.values()))
+        )
+
+    def __len__(self) -> int:
+        """Return the number of components."""
+        return len(self._components)
+
+    def __iter__(self) -> Iterator[Component]:
+        """Iterate over the components."""
+        return iter(self._components.values())
+
+    def __iadd__(self, components: Iterable[Component]) -> "ComponentList":
+        """Add multiple components using the `+=` operator."""
+
+        return self.extend(components)
+
+    def __add__(self, other: Iterable[Component]) -> "ComponentList":
+        """Concatenate two ComponentLists."""
+
+        combined = ComponentList()
+        for i, component in enumerate(chain(self, other)):
+            combined.add_component(str(i), component)
+        return combined
+
+    def __repr__(self):
+        """Return a custom repr for ModuleList that compresses repeated module representations."""
+        list_of_reprs = [repr(item) for item in self]
+        if len(list_of_reprs) == 0:
+            return self._get_name() + "()"
+
+        start_end_indices = [[0, 0]]
+        repeated_blocks = [list_of_reprs[0]]
+        for i, r in enumerate(list_of_reprs[1:], 1):
+            if r == repeated_blocks[-1]:
+                start_end_indices[-1][1] += 1
+                continue
+
+            start_end_indices.append([i, i])
+            repeated_blocks.append(r)
+
+        lines = []
+        main_str = self._get_name() + "("
+        for (start_id, end_id), b in zip(start_end_indices, repeated_blocks):
+            local_repr = f"({start_id}): {b}"  # default repr
+
+            if start_id != end_id:
+                n = end_id - start_id + 1
+                local_repr = f"({start_id}-{end_id}): {n} x {b}"
+
+            local_repr = _addindent(local_repr, 2)
+            lines.append(local_repr)
+
+        main_str += "\n  " + "\n  ".join(lines) + "\n"
+        main_str += ")"
+        return main_str
+
+    def __dir__(self):
+        keys = super().__dir__()
+        keys = [key for key in keys if not key.isdigit()]
+        return keys
+
+    def insert(self, index: int, component: Component) -> None:
+        """Insert a component at the specified index."""
+        for i in range(len(self._components), index, -1):
+            self._components[str(i)] = self._components[str(i - 1)]
+        self._components[str(index)] = component
+
+    def pop(self, index: Union[int, slice]) -> Component:
+        """Remove and return a component at the given index."""
+        component = self[index]
+        del self[index]
+        return component
+
+    def append(self, component: Component) -> "ComponentList":
+        """Append a component to the list."""
+        # self._components[str(len(self))] = component
+        self.add_component(str(len(self)), component)
+        return self
+
+    def extend(self, components: Iterable[Component]) -> "ComponentList":
+        """Extend the list by appending multiple components."""
+        # for component in components:
+        #     self.append(component)
+        # return self
+
+        if not isinstance(components, container_abcs.Iterable):
+            raise TypeError(
+                "ModuleList.extend should be called with an "
+                "iterable, but got " + type(components).__name__
+            )
+        offset = len(self)
+        for i, component in enumerate(components):
+            self.add_component(str(offset + i), component)
+        return self
+
+
+# TODO: need to do the same to ParameterList and ParameterDict, ModuleDict
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index dd0ff5f6..baedd8fb 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -4,6 +4,7 @@
 
 import json
 import re
+import os
 from pathlib import Path
 
 from typing import Any, Dict, Optional, Union, Callable, Tuple, List
@@ -36,7 +37,7 @@
     FEEDBACK_ENGINE_TEMPLATE,
     LLM_CONVERSATION_TEMPLATE,
     VARIABLE_AND_PEERS_INFO,
-    CONVERSATION_START_INSTRUCTION_BASE,
+    # CONVERSATION_START_INSTRUCTION_BASE,
     CONVERSATION_START_INSTRUCTION_CHAIN,
     OBJECTIVE_INSTRUCTION_BASE,
     OBJECTIVE_INSTRUCTION_CHAIN,
@@ -47,6 +48,8 @@
 
 log = logging.getLogger(__name__)
 
+DEBUG_MODE = os.environ.get("DEBUG_MODE", False)
+
 PromptArgType = Dict[str, Union[str, Parameter]]
 
 
@@ -64,7 +67,7 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
     Args:
         model_client (ModelClient): The model client to use for the generator.
         model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}. Please refer to :ref:`ModelClient<components-model_client>` for the details on how to set the model_kwargs for your specific model if it is from our library.
-        template (Optional[str], optional): The template for the prompt.  Defaults to :ref:`DEFAULT_LIGHTRAG_SYSTEM_PROMPT<core-default_prompt_template>`.
+        template (Optional[str], optional): The template for the prompt.  Defaults to :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>`.
         prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to None.
         output_processors (Optional[Component], optional):  The output processors after model call. It can be a single component or a chained component via ``Sequential``. Defaults to None.
         trainable_params (Optional[List[str]], optional): The list of trainable parameters. Defaults to [].
@@ -78,7 +81,9 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
     model_client: ModelClient  # for better type checking
 
     _use_cache: bool = False
-    _kwargs: Dict[str, Any] = {}
+    _kwargs: Dict[str, Any] = (
+        {}
+    )  # to create teacher generator from student TODO: might reaccess this
 
     def __init__(
         self,
@@ -96,7 +101,7 @@ def __init__(
         cache_path: Optional[str] = None,
         use_cache: bool = False,
     ) -> None:
-        r"""The default prompt is set to the DEFAULT_LIGHTRAG_SYSTEM_PROMPT. It has the following variables:
+        r"""The default prompt is set to the DEFAULT_ADALFLOW_SYSTEM_PROMPT. It has the following variables:
         - task_desc_str
         - tools_str
         - example_str
@@ -145,8 +150,8 @@ def __init__(
         #  to support better testing on the parts beside of the model call
         self.mock_output: bool = False
         self.mock_output_data: str = "mock data"
-        self.data_map_func: Callable = None
-        self.set_data_map_func()
+        # self.data_map_func: Callable = None
+        # self.set_data_map_func()
         self._use_cache = use_cache
 
         self._kwargs = {
@@ -160,6 +165,9 @@ def __init__(
             "use_cache": use_cache,
         }
         self._teacher: Optional["Generator"] = None
+        self._trace_api_kwargs: Dict[str, Any] = (
+            {}
+        )  # used by dynamic computation graph and backpropagation
 
     def set_cache_path(self, cache_path: str, model_client: object, model: str):
         """Set the cache path for the generator."""
@@ -373,7 +381,7 @@ def create_demo_data_instance(
         from adalflow.core.base_data_class import DynamicDataClassFactory
 
         # map the input fields
-        demo_data = {"id": id}
+        demo_data = {"id": id, "score": None}  # add score to trace the prediction score
         demo_data_class_output_mapping, output_fields = self._get_default_mapping(
             output
         )
@@ -409,17 +417,17 @@ def set_teacher_generator(self, teacher: "Generator" = None):
         print(f"Teacher generator set: {self._teacher}, teacher {teacher}")
         log.debug(f"Teacher generator set: {self._teacher}")
 
-    def set_data_map_func(self, map_func: Callable = None):
-        def default_map_func(data: "GeneratorOutputType") -> str:
-            return (
-                data.data
-                if data.data
-                else self.failure_message_to_backward_engine(data)
-            )
+    # def set_data_map_func(self, map_func: Callable = None):
+    #     def default_map_func(data: "GeneratorOutputType") -> str:
+    #         return (
+    #             data.data
+    #             if data.data
+    #             else self.failure_message_to_backward_engine(data)
+    #         )
 
-        self.data_map_func = map_func or default_map_func
+    #     self.data_map_func = map_func or default_map_func
 
-        log.debug(f"Data map function set: {self.data_map_func}")
+    #     log.debug(f"Data map function set: {self.data_map_func}")
 
     # TODO: limit to only one demo parameter.
     @staticmethod
@@ -431,14 +439,41 @@ def find_demo_parameter(prompt_kwargs: Dict) -> Optional[Parameter]:
                 return p
         return None
 
-    # NOTE: when training is true, forward will be called in __call__ instead of call
     def forward(
         self,
-        prompt_kwargs: Optional[Dict] = {},  # the input need to be passed to the prompt
+        prompt_kwargs: Optional[
+            Dict[str, Union[str, Parameter]]
+        ] = {},  # the input need to be passed to the prompt
         model_kwargs: Optional[Dict] = {},
         id: Optional[str] = None,
     ) -> "Parameter":
-        # 1. call the model
+        r"""Customized forward pass on top of the GradComponent forward method."""
+        # 1. convert prompt_kwargs to parameter if it is not
+        for k, v in prompt_kwargs.items():
+            if not isinstance(v, Parameter):
+                prompt_kwargs[k] = Parameter(
+                    data=v,
+                    name=f"{self.name}_{k}",
+                    requires_opt=True,
+                    param_type=ParameterType.INPUT,
+                    data_id=id,
+                )
+
+        # 2. call the model
+        unwrapped_prompt_kwargs: Dict[str, Any] = {}
+        for k, v in prompt_kwargs.items():
+            if isinstance(v, Parameter):
+                if v.param_type == ParameterType.INPUT:
+                    v.data_id = id
+                unwrapped_prompt_kwargs[k] = v.map_to_successor(self)
+            else:
+                unwrapped_prompt_kwargs[k] = v
+        if DEBUG_MODE:
+            print(
+                f"unwrapped_prompt_kwargs: {unwrapped_prompt_kwargs}, model_kwargs: {model_kwargs}"
+            )
+            print(f"prompt template: {self.template}")
+
         output: GeneratorOutputType = None
         input_args = {}
         if self.mock_output:
@@ -446,35 +481,36 @@ def forward(
         else:
             if self.teacher_mode and not isinstance(self, BackwardEngine):
                 if not self._teacher:
-                    print(
-                        f"prompt_kwargs: {prompt_kwargs}, model_kwargs: {model_kwargs}"
-                    )
-                    print(f"names: {self.name}")
+                    if DEBUG_MODE:
+                        print(
+                            f"unwrapped_prompt_kwargs: {unwrapped_prompt_kwargs}, model_kwargs: {model_kwargs}"
+                        )
+                        print(f"names: {self.name}")
                     raise ValueError("Teacher generator is not set.")
                 log.info(f"Using teacher: {self._teacher}")
                 input_args = {
                     "prompt_kwargs": compose_model_kwargs(
-                        self._teacher.prompt_kwargs, prompt_kwargs
+                        self._teacher.prompt_kwargs, unwrapped_prompt_kwargs
                     ),
                     "model_kwargs": compose_model_kwargs(
                         self._teacher.model_kwargs, model_kwargs
                     ),
                 }
-                output = self._teacher.call(prompt_kwargs, model_kwargs)
+                output = self._teacher.call(**input_args, id=id)
             else:
                 input_args = {
                     "prompt_kwargs": compose_model_kwargs(
-                        self.prompt_kwargs, prompt_kwargs
+                        self.prompt_kwargs, unwrapped_prompt_kwargs
                     ),
                     "model_kwargs": compose_model_kwargs(
                         self.model_kwargs, model_kwargs
                     ),
                 }
-                output = self.call(prompt_kwargs, model_kwargs)
+                output = self.call(**input_args, id=id)
         # 2. Generate a Parameter object from the output
         combined_prompt_kwargs = compose_model_kwargs(self.prompt_kwargs, prompt_kwargs)
-        if self.data_map_func is None:
-            self.set_data_map_func()
+        # if self.data_map_func is None:
+        #     self.set_data_map_func()
 
         predecessors = [
             p for p in combined_prompt_kwargs.values() if isinstance(p, Parameter)
@@ -494,6 +530,8 @@ def forward(
         )
         response.set_predecessors(predecessors)
         response.trace_forward_pass(input_args=input_args, full_response=output)
+        # *** special to the generator ***
+        response.trace_api_kwargs(api_kwargs=self._trace_api_kwargs)
         # attach the demo to the demo parameter
         # if self.tracing:
         demo_param = self.find_demo_parameter(combined_prompt_kwargs)
@@ -509,12 +547,14 @@ def forward(
                 output,
                 id=id,
             )
-            demo_param.add_to_trace(demo, is_teacher=self.teacher_mode)
+            demo_param.add_dataclass_to_trace(demo, is_teacher=self.teacher_mode)
         else:
             log.debug(
                 "No demo parameter found in the prompt_kwargs. You can not trace the demo data."
             )
 
+        # **** end of the special to the generator ****
+
         if not self.backward_engine:
             # self.set_backward_engine()
             log.debug(f"Backward engine: {self.backward_engine}")
@@ -547,26 +587,26 @@ def backward(
         id: Optional[str] = None,  # the id of the input
     ) -> Parameter:
 
-        log.info(f"Generator: Backward: {response}")
+        log.info(f"Generator: Backward: {response.name}")
 
         children_params = response.predecessors
-        is_chain = True
+        is_intermediate_node = True
         if response.get_gradient_and_context_text().strip() == "":
             log.info(f"Generator: Backward: No gradient found for {response}.")
 
         # backward score to the demo parameter
         for pred in children_params:
-            if pred.requires_opt:
-                pred.set_score(response._score)
-                log.debug(
-                    f"backpropagate the score {response._score} to {pred.name}, is_teacher: {self.teacher_mode}"
+            # if pred.requires_opt:
+            pred.set_score(response._score)
+            log.debug(
+                f"backpropagate the score {response._score} to {pred.name}, is_teacher: {self.teacher_mode}"
+            )
+            if pred.param_type == ParameterType.DEMOS:
+                # Accumulate the score to the demo
+                pred.add_score_to_trace(
+                    trace_id=id, score=response._score, is_teacher=self.teacher_mode
                 )
-                if pred.param_type == ParameterType.DEMOS:
-                    # Accumulate the score to the demo
-                    pred.add_score_to_trace(
-                        trace_id=id, score=response._score, is_teacher=self.teacher_mode
-                    )
-                    log.debug(f"Pred: {pred.name}, traces: {pred._traces}")
+                log.debug(f"Pred: {pred.name}, traces: {pred._traces}")
 
         # 1.backward for text-gradients
         if backward_engine:
@@ -587,7 +627,7 @@ def backward(
                     template=template,
                     backward_engine=backward_engine,
                     prompt_str=prompt_str,
-                    is_chain=is_chain,
+                    is_intermediate_node=is_intermediate_node,
                 )
         else:
             log.debug("Backward engine is not set for the generator. No text gradient.")
@@ -600,14 +640,17 @@ def _backward_through_one_predecessor(
         template: str,
         backward_engine: "BackwardEngine",
         prompt_str: str,
-        is_chain: bool = False,
+        is_intermediate_node: bool = False,
     ):
+        """Creating gradient/textual feedback for prompt type parameters."""
         if not pred.requires_opt:
             log.debug(
                 f"Generator: Skipping {pred} as it does not require optimization."
             )
             return
-        log.debug(f"Generator: Backward through {pred}, is_chain: {is_chain}")
+        log.debug(
+            f"Generator: Backward through {pred}, is_intermediate_node: {is_intermediate_node}"
+        )
 
         if pred.check_if_already_computed_gradient_respect_to(response.id):
             log.debug(
@@ -626,7 +669,8 @@ def _backward_through_one_predecessor(
         }
 
         conversation_prompt_kwargs = {
-            "variable_name": pred.name,
+            # "variable_name": pred.name,
+            # "variable_desc": pred.role_desc,
             "input_value": input_prompt_kwargs,
             "llm_output": response.data,
         }
@@ -643,9 +687,9 @@ def _backward_through_one_predecessor(
             template=VARIABLE_AND_PEERS_INFO,
         )()
 
-        conv_ins_template = CONVERSATION_START_INSTRUCTION_BASE
+        conv_ins_template = None  # CONVERSATION_START_INSTRUCTION_BASE
         obj_ins_template = OBJECTIVE_INSTRUCTION_BASE
-        if is_chain:
+        if is_intermediate_node:  # TODO: this will always be true
             conv_ins_template = CONVERSATION_START_INSTRUCTION_CHAIN
             obj_ins_template = OBJECTIVE_INSTRUCTION_CHAIN
 
@@ -661,7 +705,9 @@ def _backward_through_one_predecessor(
             template=obj_ins_template,
             prompt_kwargs={
                 "response_desc": response.role_desc,
-                "response_gradient": response.get_gradient_and_context_text(),
+                "response_gradient": response.get_gradient_and_context_text(
+                    skip_correct_sample=True
+                ),
                 "instruction_to_backward_engine": pred.instruction_to_backward_engine,
             },
         )()
@@ -673,11 +719,16 @@ def _backward_through_one_predecessor(
         gradient_output: GeneratorOutput = None
         if response._score is not None and float(response._score) > 0.9:
             log.debug(f"EvalFnToTextLoss: Skipping {pred} as the score is high enough.")
-            manual_response = f"You get a high score: {response._score}."
+            # TODO: plus score descriptions
+            manual_response = f"You get score: {response._score}."
             gradient_output = GeneratorOutput(
                 data=manual_response, raw_response=manual_response
             )
         else:
+            # manual_response = f"You get score: {response._score}."
+            # gradient_output = GeneratorOutput(
+            #     data=manual_response, raw_response=manual_response
+            # )
 
             gradient_output: GeneratorOutput = backward_engine(
                 prompt_kwargs=backward_engine_prompt_kwargs
@@ -711,7 +762,7 @@ def _backward_through_one_predecessor(
         pred.gradients_context[var_gradient] = GradientContext(
             context=conversation_str,
             response_desc=response.role_desc,
-            variable_desc=pred.role_desc,
+            variable_desc=pred.role_desc,  # parameter_desc
         )
 
     def _run_callbacks(
@@ -763,6 +814,7 @@ def call(
         log.debug(f"model_kwargs: {model_kwargs}")
 
         api_kwargs = self._pre_call(prompt_kwargs, model_kwargs)
+
         log.debug(f"api_kwargs: {api_kwargs}")
         output: GeneratorOutputType = None
         # call the model client
@@ -796,6 +848,7 @@ def call(
         )
 
         log.info(f"output: {output}")
+        self._trace_api_kwargs = api_kwargs  # tracing
         return output
 
     # TODO: training is not supported in async call yet
@@ -841,6 +894,7 @@ async def acall(
             prompt_kwargs=prompt_kwargs,
             model_kwargs=model_kwargs,
         )
+        self._trace_api_kwargs = api_kwargs  # tracing
         return output
 
     def __call__(self, *args, **kwargs) -> Union[GeneratorOutputType, Any]:
@@ -880,6 +934,10 @@ def failure_message_to_backward_engine(
         return response_value
 
 
+from adalflow.tracing.decorators import trace_generator_states
+
+
+@trace_generator_states()
 class BackwardEngine(Generator):  # it is a generator with defaule template
 
     __doc__ = """The backward engine is a Generator with a default template for the backward pass.
@@ -890,10 +948,18 @@ def __init__(self, **kwargs):
         if kwargs is None:
             kwargs = {}
         kwargs["template"] = FEEDBACK_ENGINE_TEMPLATE
+
         super().__init__(**kwargs)
         self.name = "BackwardEngine"
         self.teacher_mode = False
 
+    def call(self, **kwargs) -> GeneratorOutputType:
+        r"""Catch the rate limit error and raise it."""
+        output = super().call(**kwargs)
+        if output and output.error is not None and "429" in output.error:
+            raise ValueError(f"Error in the backward engine: {output.error}")
+        return output
+
     @staticmethod
     def failure_message_to_optimizer(
         gradient_response: GeneratorOutput,
diff --git a/adalflow/adalflow/core/prompt_builder.py b/adalflow/adalflow/core/prompt_builder.py
index eca45557..0d998b63 100644
--- a/adalflow/adalflow/core/prompt_builder.py
+++ b/adalflow/adalflow/core/prompt_builder.py
@@ -1,4 +1,4 @@
-"""Class prompt builder for LightRAG system prompt."""
+"""Class prompt builder for AdalFlow system prompt."""
 
 from typing import Dict, Any, Optional, List, TypeVar
 import logging
@@ -20,10 +20,10 @@
 class Prompt(Component):
     __doc__ = r"""Renders a text string(prompt) from a Jinja2 template string.
 
-    In default, we use the :ref:`DEFAULT_LIGHTRAG_SYSTEM_PROMPT<core-default_prompt_template>`  as the template.
+    In default, we use the :ref:`DEFAULT_ADALFLOW_SYSTEM_PROMPT<core-default_prompt_template>`  as the template.
 
     Args:
-        template (str, optional): The Jinja2 template string. Defaults to DEFAULT_LIGHTRAG_SYSTEM_PROMPT.
+        template (str, optional): The Jinja2 template string. Defaults to DEFAULT_ADALFLOW_SYSTEM_PROMPT.
         preset_prompt_kwargs (Optional[Dict], optional): The preset prompt kwargs to fill in the variables in the prompt. Defaults to {}.
 
     Examples:
diff --git a/adalflow/adalflow/core/retriever.py b/adalflow/adalflow/core/retriever.py
index bcde901f..fb65a298 100644
--- a/adalflow/adalflow/core/retriever.py
+++ b/adalflow/adalflow/core/retriever.py
@@ -83,6 +83,7 @@ def call(
         self,
         input: RetrieverQueriesType,
         top_k: Optional[int] = None,
+        id: str = None,  # for tracing, diagnosing, and training
         **kwargs,
     ) -> RetrieverOutputType:
         raise NotImplementedError("retrieve is not implemented")
@@ -91,6 +92,7 @@ async def acall(
         self,
         input: RetrieverQueriesType,
         top_k: Optional[int] = None,
+        id: str = None,  # for tracing, diagnosing, and training
         **kwargs,
     ) -> RetrieverOutputType:
         raise NotImplementedError("Async retrieve is not implemented")
@@ -102,6 +104,7 @@ def forward(
         top_k: Optional[
             int
         ] = None,  # TODO: top_k can be trained in the future if its formulated as a parameter
+        id: str = None,  # for tracing, diagnosing, and training
         **kwargs,
     ) -> Parameter:
         r"""Customized forward on top of the GradComponent forward method.
@@ -123,6 +126,8 @@ def forward(
             requires_opt=True,
             param_type=ParameterType.HYPERPARAM,
         )
+        if input is None:
+            raise ValueError("Input cannot be empty")
         response = super().forward(input, top_k=top_k, **kwargs)
         response.param_type = (
             ParameterType.RETRIEVER_OUTPUT
@@ -135,6 +140,24 @@ def backward(
         id: Optional[str] = None,
         backward_engine: Optional["Generator"] = None,
     ):
-        r"""Backward the response to pass the score to predecessors"""
-        log.info(f"Retriever backward: {response}")
-        pass
+        r"""Backward the response to pass the score to predecessors.
+        Function as a relay component"""
+        log.info(f"Retriever backward: {response.name}")
+        children_params = response.predecessors
+
+        # is_chain = True
+        if response.get_gradient_and_context_text().strip() == "":
+            log.info(f"Generator: Backward: No gradient found for {response}.")
+
+        for pred in children_params:
+            pred.set_score(response._score)
+            from adalflow.utils.logger import printc
+
+            printc(
+                f"Retriever: Backward: {pred.name} set_score: {response._score}, {response.name}",
+                "blue",
+            )
+            if pred.param_type == ParameterType.DEMOS:
+                pred.add_score_to_trace(
+                    trace_id=id, score=response._score, is_teacher=self.teacher_mode
+                )
diff --git a/adalflow/adalflow/core/string_parser.py b/adalflow/adalflow/core/string_parser.py
index 246ec176..3001b512 100644
--- a/adalflow/adalflow/core/string_parser.py
+++ b/adalflow/adalflow/core/string_parser.py
@@ -214,6 +214,7 @@ def call(self, input: str) -> JSON_PARSER_OUTPUT_TYPE:
 YAML_PARSER_OUTPUT_TYPE = JSON_PARSER_OUTPUT_TYPE
 
 
+# TODO: yaml parser needs to be more robust, currently json works way better than yaml
 class YamlParser(Parser):
     __doc__ = r"""To extract YAML strings from text and parse them into a YAML object.
 
diff --git a/adalflow/adalflow/eval/answer_match_acc.py b/adalflow/adalflow/eval/answer_match_acc.py
index 7a9fa8f7..b45e61c1 100644
--- a/adalflow/adalflow/eval/answer_match_acc.py
+++ b/adalflow/adalflow/eval/answer_match_acc.py
@@ -29,8 +29,27 @@ class AnswerMatchAcc(BaseEvaluator):
         [1.0, 1.0, 1.0]
     """
 
-    def __init__(self, type: Literal["exact_match", "fuzzy_match"] = "exact_match"):
+    def __init__(
+        self,
+        type: Literal[
+            "exact_match", "fuzzy_match", "rouge_score", "bleu_score", "bert_score"
+        ] = "exact_match",
+    ):
         self.type = type
+        if self.type == "bert_score":
+            from torchmetrics.text.bert import BERTScore
+
+            self.bertscore = BERTScore()
+
+        elif self.type == "rouge_score":
+            from torchmetrics.text.rouge import ROUGEScore
+
+            self.rougescore = ROUGEScore()
+
+        elif self.type == "bleu_score":
+            from torchmetrics.text.bleu import BLEUScore
+
+            self.bleuscore = BLEUScore()
 
     def compute_single_item(
         self,
@@ -67,6 +86,37 @@ def compute_single_item(
             y = y.lower()
             y_gt = y_gt.lower()
             return 1.0 if y_gt in y else 0.0
+        elif self.type == "bert_score":
+            from torchmetrics.text.bert import BERTScore
+
+            self.bertscore = BERTScore()
+            score = self.bertscore([y], [y_gt])
+            # get the data from the tensor
+            print(f"y: {[y]}, y_gt: {[y_gt]}, type: {type(y)}, type_gt: {type(y_gt)}")
+            print(score)
+            single_score = score["precision"].item()
+            return single_score
+        elif self.type == "rouge_score":
+            from torchmetrics.text.rouge import ROUGEScore
+
+            self.rougescore = ROUGEScore()
+            score = self.rougescore([y], [y_gt])
+            # get the data from the tensor
+            print(f"y: {[y]}, y_gt: {[y_gt]}, type: {type(y)}, type_gt: {type(y_gt)}")
+            print(score)
+            single_score = score["rouge1_precision"].item()
+            return single_score
+        elif self.type == "bleu_score":
+            from torchmetrics.text.bleu import BLEUScore
+
+            self.bleuscore = BLEUScore()
+            score = self.bleuscore([y], [y_gt])
+            # get the data from the tensor
+            print(f"y: {[y]}, y_gt: {[y_gt]}, type: {type(y)}, type_gt: {type(y_gt)}")
+            print(score)
+            single_score = score.item()
+            return single_score
+
         else:
             raise NotImplementedError
 
diff --git a/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py b/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py
index c61c3649..a78c1ec6 100644
--- a/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py
+++ b/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py
@@ -52,7 +52,9 @@ def __init__(
             for param in params
             if param.requires_opt and param.param_type == ParameterType.DEMOS
         ]
-        log.info(f"BootstrapFewShot: {self.params}")
+        log.info(f"BootstrapFewShot: {[p.name for p in self.params]}")
+
+        print(f"BootstrapFewShot: {[p.name for p in self.params]}")
 
         self._raw_shots = raw_shots
         self._bootstrap_shots = bootstrap_shots
@@ -64,7 +66,11 @@ def __init__(
             exclude_input_fields_from_bootstrap_demos
         )
 
+    # TODO: use the scores from the backward engine (optionally) on the demo parameters
+    # needs to make a decision on which
+    # this score does not make sense for multiple demo parameters
     def add_scores(self, ids: List[str], scores: List[float], is_teacher: bool = True):
+        r"""Add scores for each demo via _teacher_scores or _student_scores."""
         if len(ids) != len(scores):
             raise ValueError(
                 f"ids and scores must have the same length, got ids: {ids}, scores: {scores}"
@@ -114,10 +120,16 @@ def sample(
         weighted: bool = True,
     ):
         r"""Performs weighted sampling, ensure the score is in range [0, 1]. The higher score means better accuracy."""
-        # 1. sample from augmented demos
+        # 1. sample from augmented demos (from teacher)
         # set weights to be score
         # add 1 to all score to avoid negative weights
         augmented_options = list(augmented_demos.values())
+
+        # get the teacher scores length and the augmented demos length
+        len_teacher_scores = len(self._teacher_scores)
+        len_augmented_options = len(augmented_options)
+        print(f"len_teacher_scores: {len_teacher_scores}")
+        print(f"len_augmented_options: {len_augmented_options}")
         weights = None
         if weighted:
             weights: List[float] = []
@@ -223,6 +235,11 @@ def propose(self):
             if demo_param.requires_opt:
                 augmented_demos = demo_param._traces
                 demos = demo_param._student_traces
+
+                if len(augmented_demos) != len(demos):
+                    log.warning(
+                        f"augmented and raw demos must have the same length, got {len(augmented_demos)} and {len(demos)} \n {augmented_demos} \n and student demos {demos}"
+                    )
                 try:
                     sampled_augmented_demos, sampled_raw_demos = self.sample(
                         augmented_demos=augmented_demos,
diff --git a/adalflow/adalflow/optim/function.py b/adalflow/adalflow/optim/function.py
index 6391b68e..314fd124 100644
--- a/adalflow/adalflow/optim/function.py
+++ b/adalflow/adalflow/optim/function.py
@@ -1,3 +1,5 @@
+"""Inspired by TextGrad: Automatic differentiation via "text" """
+
 from typing import TYPE_CHECKING, Callable
 
 
diff --git a/adalflow/adalflow/optim/grad_component.py b/adalflow/adalflow/optim/grad_component.py
index 016c08db..b73e536e 100644
--- a/adalflow/adalflow/optim/grad_component.py
+++ b/adalflow/adalflow/optim/grad_component.py
@@ -57,9 +57,6 @@ def forward(self, *args, **kwargs) -> "Parameter":
         1. for all args and kwargs, if it is a `Parameter` object, it will be tracked as `Predecessor`.
         2. Trace input_args and full_response in the parameter object.
         3. Return the parameter object.
-
-        TODO: all Gradcomponent should not allow args but only kwargs.
-        For now, just check if id is in kwargs.
         """
 
         from adalflow.optim.parameter import Parameter
@@ -85,9 +82,13 @@ def forward(self, *args, **kwargs) -> "Parameter":
         for v in input_args.values():
             if isinstance(v, Parameter):
                 predecessors.append(v)
+                if v.param_type == ParameterType.INPUT:
+                    v.data_id = kwargs.get("id", None)
         for v in kwargs.values():
             if isinstance(v, Parameter):
                 predecessors.append(v)
+                if v.param_type == ParameterType.INPUT:
+                    v.data_id = kwargs.get("id", None)
 
         # 2. unwrap the parameter object to take only the data, successor_map_fn: lambda x: x.data in default
         # unwrap args
@@ -133,6 +134,28 @@ def forward(self, *args, **kwargs) -> "Parameter":
         )
         return response
 
-    def backward(self, *args, **kwargs):
-        pass
-        # raise NotImplementedError("backward method is not implemented")
+    def backward(self, *, response: "Parameter", id: str = None, **kwargs):
+        """Backward pass of the function. In default, it will pass all the scores to the predecessors.
+
+        Note: backward is mainly used internally and better to only allow kwargs as the input.
+
+        Subclass should implement this method if you need additional backward logic.
+        """
+        log.info(f"GradComponent backward: {response.name}")
+        children_params = response.predecessors
+
+        if response.get_gradient_and_context_text().strip() == "":
+            log.info(f"Generator: Backward: No gradient found for {response}.")
+
+        for pred in children_params:
+            pred.set_score(response._score)
+            from adalflow.utils.logger import printc
+
+            printc(
+                f"Retriever: Backward: {pred.name} set_score: {response._score}, {response.name}",
+                "blue",
+            )
+            if pred.param_type == ParameterType.DEMOS:
+                pred.add_score_to_trace(
+                    trace_id=id, score=response._score, is_teacher=self.teacher_mode
+                )
diff --git a/adalflow/adalflow/optim/loss_component.py b/adalflow/adalflow/optim/loss_component.py
index 93520de4..e53ac609 100644
--- a/adalflow/adalflow/optim/loss_component.py
+++ b/adalflow/adalflow/optim/loss_component.py
@@ -10,7 +10,11 @@
 
 
 class LossComponent(Component):
-    __doc__ = """A base class to define interfaces for an auto-grad component/operator.
+    __doc__ = """A base class to define a loss component.
+
+    Loss component is to compute the textual gradients/feedback for each of its predecessors using another LLM as the backward engine.
+
+    Each precessor should have basic information that is passed to its next component to inform its type such as retriever or generator and its role description.
 
     Compared with `Component`, `GradComponent` defines three important interfaces:
     - `forward`: the forward pass of the function, returns a `Parameter` object that can be traced and backpropagated.
diff --git a/adalflow/adalflow/optim/optimizer.py b/adalflow/adalflow/optim/optimizer.py
index c6fad814..943e04e5 100644
--- a/adalflow/adalflow/optim/optimizer.py
+++ b/adalflow/adalflow/optim/optimizer.py
@@ -1,12 +1,12 @@
 """Base Classes for AdalFlow Optimizers, including Optimizer, TextOptimizer, and DemoOptimizer."""
 
-from typing_extensions import TypeAlias
-from typing import Dict, Any, Union, Iterable, Sequence
+from typing import Dict, Any, Union, Iterable, Sequence, List
 
 from adalflow.optim.parameter import Parameter
 from adalflow.core.base_data_class import DataClass
 
-ParamsT: TypeAlias = Union[Iterable[Parameter], Iterable[Dict[str, Any]]]
+
+ParamsT = Union[Iterable[Parameter], Iterable[Dict[str, Any]]]
 
 
 class Optimizer:
@@ -85,3 +85,7 @@ def config_shots(self, *args, **kwargs):
     def set_dataset(self, dataset: Sequence[DataClass]):
         r"""Set the dataset for the optimizer."""
         self.dataset = dataset
+
+    def add_scores(self, ids: List[str], scores: List[float], *args, **kwargs):
+        r"""Add scores to the optimizer."""
+        raise NotImplementedError("add_scores method is not implemented")
diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
index 85fe25ea..5b60995c 100644
--- a/adalflow/adalflow/optim/parameter.py
+++ b/adalflow/adalflow/optim/parameter.py
@@ -11,14 +11,19 @@
     Optional,
     Literal,
     Callable,
+    TYPE_CHECKING,
 )
+from pyvis.network import Network
 from collections import defaultdict
 import logging
+import os
 from dataclasses import dataclass, field
 import uuid
 from adalflow.optim.types import ParameterType
 from adalflow.core.base_data_class import DataClass
 
+if TYPE_CHECKING:
+    from adalflow.optim.text_grad.tgd_optimizer import TGDData, TGDOptimizerTrace
 
 T = TypeVar("T")  # covariant set to False to allow for in-place updates
 
@@ -41,7 +46,39 @@ class GradientContext:
     )
 
 
+@dataclass
+class ComponentTrace:
+    input_args: Dict[str, Any] = field(
+        metadata={"desc": "The input arguments of the GradComponent forward"},
+        default=None,
+    )
+    full_response: object = field(
+        metadata={"desc": "The full response of the GradComponent output"}, default=None
+    )
+    api_kwargs: Dict[str, Any] = field(
+        metadata={
+            "desc": "The api_kwargs for components like Generator and Retriever that pass to the model client"
+        },
+        default=None,
+    )
+
+
+# TODO: use this to better trace the score
+@dataclass
+class ScoreTrace:
+    score: float = field(metadata={"desc": "The score of the data point"}, default=None)
+    eval_comp_id: str = field(
+        metadata={"desc": "The id of the evaluation component"}, default=None
+    )
+    eval_comp_name: str = field(
+        metadata={"desc": "The name of the evaluation component"}, default=None
+    )
+
+
 COMBINED_GRADIENTS_TEMPLATE = r"""
+{% if combined_gradients %}
+Batch size: {{ combined_gradients|length }}
+{% endif %}
 {% for g in combined_gradients %}
 {% set gradient = g[0] %}
 {% set gradient_context = g[1] %}
@@ -94,6 +131,9 @@ class Parameter(Generic[T]):
     name: str = None  # Name of the parameter, easier to read for humans
     role_desc: str = ""  # Description of the role of the parameter
     data: T = None  # Data of the parameter
+    data_id: str = (
+        None  # Id of the data from the training set, used only for input_type
+    )
     param_type: ParameterType
 
     proposing: bool = False  # State of the parameter
@@ -113,11 +153,15 @@ class Parameter(Generic[T]):
         False  # Disable the backward engine for the parameter
     )
 
+    component_trace: ComponentTrace = None  # Trace of the component
+    tgd_optimizer_trace: "TGDOptimizerTrace" = None  # Trace of the TGD optimizer
+
     def __init__(
         self,
         *,
         id: Optional[str] = None,
         data: T = None,  # for generator output, the data will be set up as raw_response
+        data_id: str = None,  # for tracing the data item in the training/val/test set
         requires_opt: bool = True,
         role_desc: str = "",
         param_type: ParameterType = ParameterType.NONE,
@@ -132,6 +176,7 @@ def __init__(
         successor_map_fn: Optional[Dict[str, Callable]] = None,
     ):
         self.id = id or str(uuid.uuid4())
+        self.data_id = data_id
 
         self.name = name
         self.role_desc = role_desc
@@ -166,10 +211,13 @@ def __init__(
         self.instruction_to_backward_engine: str = instruction_to_backward_engine
 
         # here are used for demo parameter, filled by generator.forward
-        self._traces: Dict[str, DataClass] = {}  # id of the data points
-        self._score: float = score  # end to end evaluation score
-
+        self._traces: Dict[str, DataClass] = {}  # id to data items (DynamicDataClass)
         self._student_traces: Dict[str, DataClass] = {}  # id
+
+        self._score: float = (
+            score  # end to end evaluation score, TODO: might have multiple scores if using multiple eval fns  # score is set in the gradients in the backward pass
+        )
+
         self._demos: List[DataClass] = (
             []
         )  # used for the optimizer to save the proposed demos
@@ -178,6 +226,7 @@ def __init__(
 
         self.from_response_id = from_response_id  # for gradient parameter
         self.successor_map_fn = successor_map_fn or {}
+        self.component_trace = ComponentTrace()
 
     def map_to_successor(self, successor: object) -> T:
         """Apply the map function to the successor based on the successor's id."""
@@ -238,19 +287,48 @@ def set_peers(self, peers: List["Parameter"] = None):
                     )
             self.peers = set(peers)
 
+    #############################################################################################################
+    # Trace the tgd optimizer data
+    ############################################################################################################
+    def trace_optimizer(self, api_kwargs: Dict[str, Any], response: "TGDData"):
+        from adalflow.optim.text_grad.tgd_optimizer import TGDOptimizerTrace
+
+        self.tgd_optimizer_trace = TGDOptimizerTrace(
+            api_kwargs=api_kwargs, output=response
+        )
+
+    ############################################################################################################
+    #  Trace component, include trace_forward_pass & trace_api_kwargs for now
+    ############################################################################################################
     def trace_forward_pass(self, input_args: Dict[str, Any], full_response: object):
         r"""Trace the forward pass of the parameter."""
         self.input_args = input_args
         self.full_response = full_response
+        # TODO: remove the input_args and full_response to use component_trace
+        self.component_trace.input_args = input_args
+        self.component_trace.full_response = full_response
+
+    def trace_api_kwargs(self, api_kwargs: Dict[str, Any]):
+        r"""Trace the api_kwargs for components like Generator and Retriever that pass to the model client."""
+        self.component_trace.api_kwargs = api_kwargs
 
     def set_eval_fn_input(self, eval_input: object):
         r"""Set the input for the eval_fn."""
         self.eval_input = eval_input
 
+    ###################################################################################################################
+    #   Used for demo optimizer (forward and backward pass) to accumlate the traces on both score and DynamicDataClass
+    ###################################################################################################################
     def set_score(self, score: float):
+        r"""Set the score of the parameter in the backward pass
+        For intermediate nodes, there is only one score per each eval fn behind this node.
+        For leaf nodes, like DEMO or PROMPT, it will have [batch_size] of scores.
+
+        But this score is only used to relay the score to the demo parametr.
+        """
         self._score = score
 
-    def add_to_trace(self, trace: DataClass, is_teacher: bool = True):
+    def add_dataclass_to_trace(self, trace: DataClass, is_teacher: bool = True):
         r"""Called by the generator.forward to add a trace to the parameter.
 
         It is important to allow updating to the trace, as this will give different sampling weight.
@@ -273,7 +351,12 @@ def add_score_to_trace(self, trace_id: str, score: float, is_teacher: bool = Tru
             raise ValueError(
                 f"Trace with id {trace_id} does not exist. Current traces: {target.keys()}"
             )
-        target[trace_id].score = score
+
+        setattr(target[trace_id], "score", score)
+
+        from adalflow.utils.logger import printc
+
+        printc(f"Adding score {score} to trace {trace_id}", "magenta")
 
     ############################################################################################################
     #   Used for optimizer to propose new data
@@ -349,10 +432,13 @@ def get_gradients_names(self) -> str:
         names = ", ".join(names)
         return names
 
-    def get_gradient_and_context_text(self) -> str:
+    def get_gradient_and_context_text(self, skip_correct_sample: bool = False) -> str:
         """Aggregates and returns:
         1. the gradients
         2. the context text for which the gradients are computed
+
+        Sort the gradients from the lowest score to the highest score.
+        Highlight the gradients with the lowest score to the optimizer.
         """
         from adalflow.core.prompt_builder import Prompt
 
@@ -362,13 +448,26 @@ def get_gradient_and_context_text(self) -> str:
 
         # sore gradients by the _score from low to high
         self.gradients = sorted(
-            self.gradients, key=lambda x: x._score if x._score else 1
+            self.gradients, key=lambda x: x._score if x._score is not None else 1
         )
-
-        gradient_context_combined = zip(
-            self.gradients,
-            [self.gradients_context[g] for g in self.gradients],
+        # print the score for the sorted gradients
+        lowest_score_gradients = []
+        for i, g in enumerate(self.gradients):
+            if skip_correct_sample:
+                if g._score > 0.5:
+                    continue
+            lowest_score_gradients.append(g)
+            print(f"{i} Score: {g._score} for {g.name}, {type(g._score)}")
+
+        gradient_context_combined = list(
+            zip(
+                lowest_score_gradients,
+                [self.gradients_context[g] for g in lowest_score_gradients],
+            )
         )
+        # set all gradients value to None
+        # for g in self.gradients:
+        #     g.data = None
 
         gradient_context_combined_str = Prompt(
             template=COMBINED_GRADIENTS_TEMPLATE,
@@ -416,9 +515,28 @@ def build_graph(node: "Parameter"):
         build_graph(root)
         return nodes, edges
 
+    def report_cycle(cycle_nodes: List["Parameter"]):
+        """
+        Report the detected cycle and provide guidance to the user on how to avoid it.
+        """
+        cycle_names = [node.name for node in cycle_nodes]
+        log.warning(f"Cycle detected: {' -> '.join(cycle_names)}")
+        print(f"Cycle detected in the graph: {' -> '.join(cycle_names)}")
+
+        # Provide guidance on how to avoid the cycle
+        print("To avoid the cycle, consider the following strategies:")
+        print("- Modify the graph structure to remove cyclic dependencies.")
+        print(
+            "- Check the relationships between these nodes to ensure no feedback loops."
+        )
+
     def backward(
         self,
-    ):  # engine should be the llm or customized backwards function to pass feedback
+    ):
+        """
+        Apply backward pass for for all nodes in the graph by reversing the topological order.
+        """
+        # engine should be the llm or customized backwards function to pass feedback
 
         # topological sort of all the predecessors of the current parameter in the graph
         log.debug(f"Backward pass for {self.data}, backward function: {self.grad_fn}")
@@ -441,19 +559,166 @@ def build_topo(node: Parameter):
             if not node.requires_opt:
                 log.debug(f"Skipping {node.name} as it does not require optimization")
                 continue
-            node.gradients = _check_and_reduce_gradients(node)
             log.debug(f"v: {node.data}, grad_fn: {node.grad_fn}, {node.get_grad_fn()}")
             if node.get_grad_fn() is not None:  # gradient function takes in the engine
                 log.debug(f"Calling gradient function for {node.name}")
                 node.grad_fn()
 
+    # def backward(
+    #     self,
+    # ):  # engine should be the llm or customized backwards function to pass feedback
+
+    #     # topological sort of all the predecessors of the current parameter in the graph
+    #     log.debug(f"Backward pass for {self.data}, backward function: {self.grad_fn}")
+    #     topo: List[Parameter] = []
+    #     visited = set()
+    #     in_stack = set()  # Nodes currently being visited to detect cycles
+    #     cycle_detected = False  # Flag to check if any cycle was detected
+
+    #     def build_topo(node: Parameter, stack: Set[Parameter] = set()):
+    #         nonlocal cycle_detected
+
+    #         if stack is None:
+    #             stack = []
+
+    #         # If the node is already in the stack, we have detected a cycle
+    #         if node in in_stack:
+    #             cycle_detected = True
+    #             cycle_nodes = stack + [node]  # The cycle includes the current path
+    #             self.report_cycle(cycle_nodes)
+    #             return False  # Stop further processing due to cycle
+    #         if node in visited:
+    #             return
+    #         visited.add(node)
+    #         in_stack.add(node)
+    #         stack.append(node)
+    #         for pred in node.predecessors:
+    #             build_topo(pred)
+    #         topo.append(node)
+    #         stack.pop()  # Backtrack, remove the node from the current path
+
+    #         in_stack.remove(node)  # Remove from the stack after processing
+    #         return True
+
+    #     # build_topo(self)
+    #     if not build_topo(self):
+    #         log.error("Cycle detected, stopping backward pass.")
+    #         return  # Stop the backward pass due to cycle detection
+    #     # backpropagation
+
+    #     self.gradients = set()
+    #     for node in reversed(topo):
+    #         if not node.requires_opt:
+    #             log.debug(f"Skipping {node.name} as it does not require optimization")
+    #             continue
+    #         node.gradients = _check_and_reduce_gradients(node)
+    #         log.debug(f"v: {node.data}, grad_fn: {node.grad_fn}, {node.get_grad_fn()}")
+    #         if node.get_grad_fn() is not None:  # gradient function takes in the engine
+    #             log.debug(f"Calling gradient function for {node.name}")
+    #             node.grad_fn()
+
+    def draw_interactive_html_graph(
+        self,
+        filepath: Optional[str] = None,
+        nodes: List["Parameter"] = None,
+        edges: List[Tuple["Parameter", "Parameter"]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Generate an interactive graph with pyvis and save as an HTML file.
+
+        Args:
+            nodes (list): A list of Parameter objects.
+            edges (list): A list of edges as tuples (source, target).
+            filepath (str, optional): Path to save the graph file. Defaults to None.
+
+        Returns:
+            dict: A dictionary containing the graph file path.
+        """
+        from jinja2 import Template
+
+        # Define the output file path
+        output_file = "interactive_graph.html"
+        final_file = filepath + "_" + output_file if filepath else output_file
+
+        # Create a pyvis Network instance
+        net = Network(height="750px", width="100%", directed=True)
+
+        # Add nodes to the graph
+        node_ids = set()
+        for node in nodes:
+            label = (
+                f"<b>Name:</b> {node.name}<br>"
+                f"<b>Role:</b> {node.role_desc.capitalize()}<br>"
+                f"<b>Value:</b> {node.data}<br>"
+                f"<b>Data ID:</b> {node.data_id}<br>"
+            )
+            if node.proposing:
+                label += "<b>Proposing:</b> Yes<br>"
+                label += f"<b>Previous Value:</b> {node.previous_data}<br>"
+            if node.requires_opt:
+                label += "<b>Requires Optimization:</b> Yes<br>"
+            if node.param_type:
+                label += f"<b>Type:</b> {node.param_type}<br>"
+            if node.gradients:
+                label += f"<b>Gradients:</b> {node.get_gradients_names()}<br>"
+
+            net.add_node(
+                node.id,
+                label=node.name,
+                title=label,
+                color="lightblue" if node.proposing else "orange",
+            )
+            node_ids.add(node.id)
+
+        # Add edges to the graph
+        for source, target in edges:
+            if source.id in node_ids and target.id in node_ids:
+                net.add_edge(source.id, target.id)
+            else:
+                print(
+                    f"Skipping edge from {source.name} to {target.name} as one of the nodes does not exist."
+                )
+
+        # Enable physics for better layout
+        net.toggle_physics(True)
+        net.template = Template(
+            """
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <script src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"></script>
+            <link href="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis-network.min.css" rel="stylesheet" />
+        </head>
+        <body>
+            <div id="mynetwork" style="height: {{ height }};"></div>
+            <script type="text/javascript">
+                var nodes = new vis.DataSet({{ nodes | safe }});
+                var edges = new vis.DataSet({{ edges | safe }});
+                var container = document.getElementById('mynetwork');
+                var data = { nodes: nodes, edges: edges };
+                var options = {{ options | safe }};
+                var network = new vis.Network(container, data, options);
+            </script>
+        </body>
+        </html>
+        """
+        )
+
+        # Save the graph as an HTML file
+
+        net.show(final_file)
+        print(f"Interactive graph saved to {final_file}")
+
+        return {"graph_path": final_file}
+
     def draw_graph(
         self,
         add_grads: bool = True,
+        full_trace: bool = False,
         format: Literal["png", "svg"] = "png",
         rankdir: Literal["LR", "TB"] = "TB",
         filepath: Optional[str] = None,
-    ):
+    ) -> Dict[str, Any]:
         """Draw the graph of the parameter and its gradients.
 
         Args:
@@ -461,10 +726,10 @@ def draw_graph(
             format (str, optional): The format of the output file. Defaults to "png".
             rankdir (str, optional): The direction of the graph. Defaults to "TB".
             filepath (str, optional): The path to save the graph. Defaults to None.
+            full_trace (bool, optional): Whether to include more detailed trace such as api_kwargs. Defaults to False.
         """
         from adalflow.utils import save_json
         from adalflow.utils.global_config import get_adalflow_default_root_path
-        import os
 
         try:
             from graphviz import Digraph
@@ -538,6 +803,8 @@ def wrap_and_escape(text, width=40):
                 f"<tr><td><b><font color='{label_color}'>Role: </font></b></td><td>{wrap_and_escape(n.role_desc.capitalize())}</td></tr>"
                 f"<tr><td><b><font color='{label_color}'>Value: </font></b></td><td>{wrap_and_escape(n.data)}</td></tr>"
             )
+            if n.data_id is not None:
+                node_label += f"<tr><td><b><font color='{label_color}'>Data ID: </font></b></td><td>{wrap_and_escape(n.data_id)}</td></tr>"
             if n.proposing:
                 node_label += f"<tr><td><b><font color='{label_color}'>Proposing</font></b></td><td>{{'Yes'}}</td></tr>"
                 node_label += f"<tr><td><b><font color='{label_color}'>Previous Value: </font></b></td><td>{wrap_and_escape(n.previous_data)}</td></tr>"
@@ -545,6 +812,12 @@ def wrap_and_escape(text, width=40):
                 node_label += f"<tr><td><b><font color='{label_color}'>Requires Optimization: </font ></b></td><td>{{'Yes'}}</td></tr>"
             if n.param_type:
                 node_label += f"<tr><td><b><font color='{label_color}'>Type: </font></b></td><td>{wrap_and_escape(n.param_type.name)}</td></tr>"
+            if full_trace and n.component_trace.api_kwargs is not None:
+                node_label += f"<tr><td><b><font color='{label_color}'> API kwargs: </font></b></td><td>{wrap_and_escape(str(n.component_trace.api_kwargs))}</td></tr>"
+
+            # show the score for intermediate nodes
+            if n._score is not None and len(n.predecessors) > 0:
+                node_label += f"<tr><td><b><font color='{label_color}'>Score: </font></b></td><td>{str(n._score)}</td></tr>"
             if add_grads:
                 node_label += f"<tr><td><b><font color='{label_color}'>Gradients: </font></b></td><td>{wrap_and_escape(n.get_gradients_names())}</td></tr>"
                 # add a list of each gradient with short value
@@ -562,6 +835,8 @@ def wrap_and_escape(text, width=40):
             if len(n._traces.values()) > 0:
                 node_label += f"<tr><td><b><font color='{label_color}'>Traces: keys: </font></b></td><td>{wrap_and_escape(str(n._traces.keys()))}</td></tr>"
                 node_label += f"<tr><td><b><font color='{label_color}'>Traces: values: </font></b></td><td>{wrap_and_escape(str(n._traces.values()))}</td></tr>"
+            if n.tgd_optimizer_trace is not None:
+                node_label += f"<tr><td><b><font color='{label_color}'>TGD Optimizer Trace: </font></b></td><td>{wrap_and_escape(str(n.tgd_optimizer_trace))}</td></tr>"
 
             node_label += "</table>"
             # check if the name exists in dot
@@ -591,6 +866,7 @@ def wrap_and_escape(text, width=40):
         #     raise ImportError(
         #         "Please install matplotlib using 'pip install matplotlib' to use this feature"
         #     ) from e
+        #     ) from e
         # from io import BytesIO
         # import numpy as np
 
@@ -623,11 +899,17 @@ def wrap_and_escape(text, width=40):
         # save_json(prompts, filename)
         # save root node to_dict to json
         save_json(self.to_dict(), f"{filepath}_root.json")
-        return dot
+
+        # draw interactive graph
+        self.draw_interactive_html_graph(
+            filepath=filepath, nodes=[n for n in nodes], edges=edges
+        )
+        return {"graph_path": filepath, "root_path": f"{filepath}_root.json"}
 
     def to_dict(self):
         return {
             "name": self.name,
+            "id": self.id,
             "role_desc": self.role_desc,
             "data": str(self.data),
             "requires_opt": self.requires_opt,
@@ -682,20 +964,3 @@ def from_dict(cls, data: dict):
     def __repr__(self):
         return f"Parameter(name={self.name}, requires_opt={self.requires_opt}, param_type={self.param_type}, role_desc={self.role_desc}, data={self.data}, predecessors={self.predecessors}, gradients={self.gradients},\
             raw_response={self.raw_response}, input_args={self.input_args}, traces={self._traces})"
-
-
-def _check_and_reduce_gradients(variable: Parameter) -> Set[Parameter]:
-
-    if variable.get_gradient_and_context_text() == "":
-        log.debug(f"No gradients detected for {variable.data}")
-        return variable.gradients
-    if len(variable.gradients) == 1:
-        log.debug(f"Only one gradient, no need to reduce: {variable.gradients}")
-        return variable.gradients
-    else:
-        log.debug(
-            f"Multiple gradients detected for {variable.data}. But we are not reducting them."
-        )
-        return variable.gradients
-
-    # TODO: Implement the reduction logic later
diff --git a/adalflow/adalflow/optim/text_grad/backend_engine_prompt.py b/adalflow/adalflow/optim/text_grad/backend_engine_prompt.py
index e3b60862..a5f3ddb1 100644
--- a/adalflow/adalflow/optim/text_grad/backend_engine_prompt.py
+++ b/adalflow/adalflow/optim/text_grad/backend_engine_prompt.py
@@ -11,31 +11,96 @@
 FEEDBACK_ENGINE_TEMPLATE = r"""<START_OF_SYSTEM_PROMPT>
 You are the feedback engine in an optimization system.
 
-Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.
+Your task is to provide intelligent and creative feedback for the target variable enclosed in <VARIABLE></VARIABLE> tags,
+so that the optimizer can optimize this variable to improve the objective enclosed in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.
+
 1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.
 2. Feedback examples: "Since language models have the X failure mode...", "Adding X can fix this error because...", "Removing X can improve the objective function because...", "Changing X to Y would fix the mistake..."
 3. Consider the variable in the context of its peers if provided.
+
 Remember:
-Be concise, critical, and direct.
+Be specific, concise, critical, and direct.
 <END_OF_SYSTEM_PROMPT>
 <CONVERSATION>
 {{conversation_sec}}
 </CONVERSATION>
 {{objective_instruction_sec}}
 """
+##############################################
+# Loss Component
+##############################################
+
+
+# Objective instruction for LLM as gradComponent with user custom instruction
+
+# OBJECTIVE_INSTRUCTION_BASE = r"""<OBJECTIVE_FUNCTION>
+# Our only goal is to improve the above metric, and nothing else.
+# {% if instruction_to_backward_engine %}
+# Note: {{instruction_to_backward_engine}}
+# {% endif %}
+# </OBJECTIVE_FUNCTION>"""
+
+OBJECTIVE_INSTRUCTION_BASE = r"""<OBJECTIVE_FUNCTION>
+Your only goal is to clearly states how it obtained the "<OUTPUTS/SCORE>".
+Especially when the score is low.
+Be CONCISE.
+Be specific on why it has a low score.
+e.g. "The retrieved context is not enough to answer the question so the problem relies on the retrieval part."
+</OBJECTIVE_FUNCTION>"""
+
+
+### Variable to get feedback on, often it is pred in the loss component
+LOSS_CONVERSATION_START_INSTRUCTION_STRING_FN = r"""
+TARGET VARIABLE:
+<NAME> {{variable_name}} </NAME>
+<ROLE> {{variable_desc}} </ROLE>
+<VARIABLE> {{variable_value}} </VARIABLE>
+{{conversation_str}}
+"""
+
+###  Loss/Score Information  ###
+LOSS_CONVERSATION_TEMPLATE_STRING = r"""
+The variable is passed to the eval function and compared with a target/ground truth value.
+
+<EVAL_FUNC_DESCRIPTION>: {{eval_fn_desc}}
+<INPUTS>: {{input_str}}
+<OUTPUTS/SCORE>: {{response_value}}
+{% if metadata %}
+Note: {{metadata}}
+{% endif %}"""
+
+
+##############################################
+# LLM as gradComponent
+##############################################
+# When the parameter has a gradient, it is the continuation of the backpropagation chain, a layer in the models
+CONVERSATION_START_INSTRUCTION_CHAIN = r"""
+{{variable_and_peers_info}}
+
+Here is a conversation with the language model (LM):
+{{conversation_str}}
+"""
+
+OBJECTIVE_INSTRUCTION_CHAIN = r"""
+This conversation is part of a larger system. The <LM_OUTPUT> was later used as {{response_desc}}.
+<OBJECTIVE_FUNCTION>
+Your goal is to give feedback to the variable to guide the LLM_OUTPUT according to feedback: {{response_gradient}}
+{% if instruction_to_backward_engine %}
+Note: {{instruction_to_backward_engine}}
+{% endif %}
+</OBJECTIVE_FUNCTION>"""
 
 ###  Backward engine: user prompt
 # First part to provide context of LLM as gradComponent
+# The target variable is used as either input or a task instruction to a language model (LM):
+# replace the "The target variable is used as either input or a task instruction to a language model (LM):" with the {{variable_desc}}
+# NAME: {{variable_name}}
+# Description: {{variable_desc}}
 LLM_CONVERSATION_TEMPLATE = r"""
-NAME: {{variable_name}}
-The target variable is used as either input or a task instruction to a language model (LM):
-
 LM_INPUT: {{input_value}}
 LM_OUTPUT: {{llm_output}}"""
 
 
-# only passing variable (dict) and peers as parameters
-# shared between the
 VARIABLE_AND_PEERS_INFO = r"""
 <START_OF_VARIABLE_DESC>
 {{variable.name}}
@@ -62,37 +127,11 @@
 {% endif %}
 """
 
-# When the parameter has no gradient, it is the start of the backpropagation chain, used as a loss function
-CONVERSATION_START_INSTRUCTION_BASE = r"""
-{{variable_and_peers_info}}
-
-Here is an evaluation of the variable using a language model:
-{{conversation_str}}
-"""
 
-# When the parameter has a gradient, it is the continuation of the backpropagation chain, a layer in the models
-CONVERSATION_START_INSTRUCTION_CHAIN = r"""
-{{variable_and_peers_info}}
-
-Here is a conversation with a language model (LM):
-{{conversation_str}}
-"""
-
-# Objective instruction for LLM as gradComponent with user custom instruction
+# # When the parameter has no gradient, it is the start of the backpropagation chain, used as a loss function
+# CONVERSATION_START_INSTRUCTION_BASE = r"""
+# {{variable_and_peers_info}}
 
-OBJECTIVE_INSTRUCTION_BASE = r"""<OBJECTIVE_FUNCTION>
-Our only goal is to improve the above metric, and nothing else.
-{% if instruction_to_backward_engine %}
-Note: {{instruction_to_backward_engine}}
-{% endif %}
-</OBJECTIVE_FUNCTION>"""
-
-
-OBJECTIVE_INSTRUCTION_CHAIN = r"""
-This conversation is part of a larger system. The <LM_OUTPUT> was later used as {{response_desc}}.
-<OBJECTIVE_FUNCTION>
-Your goal is to give feedback to the variable with the LLM_OUTPUT: {{response_gradient}}
-{% if instruction_to_backward_engine %}
-Note: {{instruction_to_backward_engine}}
-{% endif %}
-</OBJECTIVE_FUNCTION>"""
+# Here is an evaluation of the variable using a language model:
+# {{conversation_str}}
+# """
diff --git a/adalflow/adalflow/optim/text_grad/llm_text_loss.py b/adalflow/adalflow/optim/text_grad/llm_text_loss.py
index 46cf7a0f..d34373e1 100644
--- a/adalflow/adalflow/optim/text_grad/llm_text_loss.py
+++ b/adalflow/adalflow/optim/text_grad/llm_text_loss.py
@@ -1,4 +1,6 @@
-"""Implementation of TextGrad: Automatic “Differentiation” via Text"""
+"""Implementation of TextGrad: Automatic “Differentiation” via Text.
+This code is not used as we treat the non-optimizable version of LLM judge as a form of eval_fn.
+We use class EvalFnToTextLoss instead as of today 12/9/2024"""
 
 from typing import Union, TYPE_CHECKING
 
diff --git a/adalflow/adalflow/optim/text_grad/ops.py b/adalflow/adalflow/optim/text_grad/ops.py
index da2b438f..ddce60dc 100644
--- a/adalflow/adalflow/optim/text_grad/ops.py
+++ b/adalflow/adalflow/optim/text_grad/ops.py
@@ -76,8 +76,11 @@ def forward(self, params: List[Parameter]) -> Parameter:
 
     def backward(self, summation: Parameter):
         """
-        Performs the backward pass of the sum operation.
-        This is simply an idempotent operation, where we make a gradient with the combined feedback and add it to the predecessors'grads.
+        Computes gradients for the predecessors of the sum operation.
+        There is no gradient computation for the sum operation itself.
+        It is a simple way to combine multiple losses for convenience.
+
+        sum.backward() => [loss1.backward(), loss2.backward(), ...]
 
         :param summation: The parameter representing the sum.
         :type summation: Parameter
diff --git a/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py b/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py
index c8654d4a..89ebd471 100644
--- a/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py
+++ b/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py
@@ -16,20 +16,25 @@
 
 from adalflow.core.prompt_builder import Prompt
 from adalflow.eval.base import BaseEvaluator
+from adalflow.optim.text_grad.backend_engine_prompt import (
+    LOSS_CONVERSATION_TEMPLATE_STRING,
+    LOSS_CONVERSATION_START_INSTRUCTION_STRING_FN,
+    OBJECTIVE_INSTRUCTION_BASE,
+)
 
 
 log = logging.getLogger(__name__)
 
 ###  Loss/Score Information  ###
-CONVERSATION_TEMPLATE_STRING = r"""
-The variable is passed to the eval function and compared with a target/ground truth value.
+# LOSS_CONVERSATION_TEMPLATE_STRING = r"""
+# The variable is passed to the eval function and compared with a target/ground truth value.
 
-<EVAL_FUNC_DESCRIPTION>: {{eval_fn_desc}}
-<INPUTS>: {{input_str}}
-<OUTPUTS/SCORE>: {{response_value}}
-{% if metadata %}
-Note: {{metadata}}
-{% endif %}"""
+# <EVAL_FUNC_DESCRIPTION>: {{eval_fn_desc}}
+# <INPUTS>: {{input_str}}
+# <OUTPUTS/SCORE>: {{response_value}}
+# {% if metadata %}
+# Note: {{metadata}}
+# {% endif %}"""
 
 
 # Does not have gradient on the output, the loss function of the backpropagation chain
@@ -41,22 +46,22 @@
 # Has the gradient on the output, the layer in the backpropagation chain
 # Conversation will be provided differently.
 
-### Variable Information ###
-CONVERSATION_START_INSTRUCTION_STRING_FN = r"""
-TARGET VARIABLE:
-<NAME>: {{variable_name}}
-<ROLE> {{variable_desc}} </ROLE>
-<VARIABLE> {{variable_value}} </VARIABLE>
-{{conversation_str}}
-"""
+# ### Variable Information ###
+# CONVERSATION_START_INSTRUCTION_STRING_FN = r"""
+# TARGET VARIABLE:
+# <NAME> {{variable_name}} </NAME>
+# <ROLE> {{variable_desc}} </ROLE>
+# <VARIABLE> {{variable_value}} </VARIABLE>
+# {{conversation_str}}
+# """
 
 # Third part of the user prompt
-OBJECTIVE_INSTRUCTION_BASE = r"""<OBJECTIVE_FUNCTION>
-Your only goal is to clearly states how it obtained the "<OUTPUTS/SCORE>".
-Especially when the score is low.
-Be CONCISE.
-If you have enough context, add a more specific feedback on how it failed.
-</OBJECTIVE_FUNCTION>"""
+# OBJECTIVE_INSTRUCTION_BASE = r"""<OBJECTIVE_FUNCTION>
+# Your only goal is to clearly states how it obtained the "<OUTPUTS/SCORE>".
+# Especially when the score is low.
+# Be CONCISE.
+# If you have enough context, add a more specific feedback on how it failed.
+# </OBJECTIVE_FUNCTION>"""
 
 
 OBJECTIVE_INSTRUCTION_CHAIN = r"""This conversation is part of a larger system. The <INPUTS/SCORE> was later used as "{{response_name}}: {{response_desc}}".
@@ -206,7 +211,7 @@ def _backward_through_one_predecessor(
         response: Parameter,
         eval_fn_desc: str,
         backward_engine: "BackwardEngine",
-        is_chain: bool = False,
+        is_intermediate_node: bool = False,  # if the node is an intermediate node in the backpropagation chain
         metadata: Dict[str, str] = None,
     ):
         if not pred.requires_opt:
@@ -214,7 +219,9 @@ def _backward_through_one_predecessor(
                 f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization."
             )
             return
-        log.debug(f"EvalFnToTextLoss: Backward through {pred}, is_chain: {is_chain}")
+        log.debug(
+            f"EvalFnToTextLoss: Backward through {pred}, is_intermediate_node: {is_intermediate_node}"
+        )
 
         if pred.check_if_already_computed_gradient_respect_to(response.id):
             log.info(
@@ -237,7 +244,7 @@ def _backward_through_one_predecessor(
 
         # response information
         conversation_str = Prompt(
-            CONVERSATION_TEMPLATE_STRING,
+            LOSS_CONVERSATION_TEMPLATE_STRING,
             prompt_kwargs={
                 "input_str": inputs_string,
                 "eval_fn_desc": eval_fn_desc,
@@ -246,10 +253,10 @@ def _backward_through_one_predecessor(
             },
         )()
 
-        conv_ins_template = CONVERSATION_START_INSTRUCTION_STRING_FN
+        conv_ins_template = LOSS_CONVERSATION_START_INSTRUCTION_STRING_FN
         obj_ins_template = OBJECTIVE_INSTRUCTION_BASE
 
-        if is_chain:
+        if is_intermediate_node:
             # conv_ins_template = CONVERSATION_START_INSTRUCTION_STRING_FN_CHAIN
             obj_ins_template = OBJECTIVE_INSTRUCTION_CHAIN
 
@@ -315,6 +322,7 @@ def _backward_through_one_predecessor(
         )
 
         # backward the end to end score
+        # TODO: not really useful
         pred.set_score(response.data)
         print(f"setting pred name {pred.name} score to {response.data}")
 
@@ -335,11 +343,11 @@ def backward(
         """
         log.info(f"EvalFnToTextLoss: Backward: {response}")
         children_params = response.predecessors
-        is_chain = True
+        is_intermediate_node = False
         response_gradient_context = response.get_gradient_and_context_text().strip()
-        if response_gradient_context == "":
-            log.info(f"EvalFnToTextLoss: Backward: No gradient found for {response}.")
-            is_chain = False
+        if response_gradient_context != "":
+            log.info("EvalFnToTextLoss is an intermediate node.")
+            is_intermediate_node = True
         log.info(f"response_gradient_context: {response_gradient_context}")
 
         # go through all child parameters
@@ -364,17 +372,27 @@ def backward(
                     response,
                     eval_fn_desc,
                     backward_engine,
-                    is_chain,
+                    is_intermediate_node,
                     metadata,
                 )
         # backward for the score for the demo
         for pred in children_params:
-            if not pred.requires_opt:
-                log.debug(
-                    f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization."
+            # if not pred.requires_opt:
+            #     log.debug(
+            #         f"EvalFnToTextLoss: Skipping {pred} as it does not require optimization."
+            #     )
+            #     continue
+            if not isinstance(response.data, float):
+                raise TypeError(
+                    f"EvalFnToTextLoss: response.data must be a float. Got {type(response.data)}."
                 )
-                continue
-            pred._score = float(response.data)
+            pred._score = response.data
+            from adalflow.utils.logger import printc
+
+            printc(
+                f"EvalFnToTextLoss: {pred.name} set_score: {response.data}, {response.name}",
+                "blue",
+            )
             log.info(f"setting pred name {pred.name} score to {response.data}")
 
 
diff --git a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
index f2d5b918..219c299a 100644
--- a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
+++ b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
@@ -6,18 +6,19 @@
 Source code: https://github.com/google-deepmind/opro
 """
 
-from typing import List, Dict, TYPE_CHECKING, Optional
+from typing import List, Dict, TYPE_CHECKING, Optional, Any
 from collections import defaultdict
 import logging
 import re
 from dataclasses import field, dataclass
 
-
 from adalflow.optim.optimizer import TextOptimizer, ParamsT
 from adalflow.optim.text_grad.backend_engine_prompt import VARIABLE_AND_PEERS_INFO
 from adalflow.optim.parameter import Parameter
 
 from adalflow.core.base_data_class import DataClass
+from adalflow.tracing.decorators import trace_generator_states
+
 
 if TYPE_CHECKING:
     from adalflow.core import ModelClient
@@ -26,34 +27,6 @@
 log = logging.getLogger(__name__)
 
 
-# Tips:
-# 1. Eliminate unnecessary words or phrases.
-# 2. Add new elements to address specific feedback.
-# 3. Be creative and present the variable differently.
-OPTIMIZER_SYSTEM_PROMPT = r"""
-You are part of an optimization system that refines existing variable values based on feedback.
-
-Your task: Propose a new variable value in response to the feedback.
-1. Address the concerns raised in the feedback while preserving positive aspects.
-2. Observe past performance patterns when provided and to keep the good quality.
-3. Consider the variable in the context of its peers if provided.
-   FYI:
-   - If a peer will be optimized itself, do not overlap with its scope.
-   - Otherwise, you can overlap if it is necessary to address the feedback.
-
-Output:
-Provide only the new variable value between {{new_variable_start_tag}} and {{new_variable_end_tag}} tags.
-
-Tips:
-1. Eliminate unnecessary words or phrases.
-2. Add new elements to address specific feedback.
-3. Be creative and present the variable differently.
-{% if instruction_to_optimizer %}
-4. {{instruction_to_optimizer}}
-{% endif %}
-"""
-
-
 @dataclass
 class HistoryPrompt(DataClass):
     id: str
@@ -61,16 +34,24 @@ class HistoryPrompt(DataClass):
     eval_score: float
 
 
+####################################################################################################
+# Textual Gradient Descent Optimizer
+####################################################################################################
+# {% if failed_proposals %}
+# Here are the past failed proposals:
+# {% for failed_proposal in failed_proposals %}
+# {{loop.index}}. {{failed_proposal}}
+# {% endfor %}
+# {% endif %}
 TEXT_GRAD_DESC_TEMPLATE = r"""<START_OF_SYSTEM_PROMPT>
 {{optimizer_system_prompt}}
 <END_OF_SYSTEM_PROMPT>
 <START_OF_USER_MESSAGE>
-{#Variable and feedback#}
-{{variable_and_peers_info}}
-{# ORPO past history #}
+
+{# OPRO past history #}
 {% if past_history %}
 <START_OF_HISTORY_PERFORMANCE>
-Here are the past iterations of this variable along with the validation score.
+Here are the best past iterations of this variable along with the validation score.
 {% for history in past_history %}
 {{loop.index}}. {{history}}
 {% endfor %}
@@ -100,9 +81,41 @@ class HistoryPrompt(DataClass):
 You must base on the following examples when modifying the {{variable_desc}}:
 <EXAMPLES>{{in_context_examples}}</EXAMPLES>
 {% endif %}
+YOU MUST ENSURE the new variable shares the same intent as the original variable.
+You can either rephrase the initial variable, or add more specific instructions based on the feedback.
+You can not change the variable to only fit on one sample if the batch size is larger than 1.
 <END_OF_USER_MESSAGE>
 """
 
+# optimizer system prompt
+
+# Tips:
+# 1. Eliminate unnecessary words or phrases.
+# 2. Add new elements to address specific feedback.
+# 3. Be creative and present the variable differently.
+# Provide only the new variable value between {{new_variable_start_tag}} and {{new_variable_end_tag}} tags.
+OPTIMIZER_SYSTEM_PROMPT = r"""
+You are part of an optimization system that refines existing variable based on feedback generated on a batch of input data.
+
+1. Address the concerns raised in the feedback while preserving positive aspects.
+3. Observe past performance patterns when provided and to keep the good quality.
+4. Consider the variable in the context of its peers if provided.
+   FYI:
+   - If a peer will be optimized itself, do not overlap with its scope.
+   - Otherwise, you can overlap if it is necessary to address the feedback.
+
+{{output_format_str}}
+
+
+Tips:
+1. Eliminate unnecessary words or phrases.
+2. Add new elements to address specific feedback.
+3. Be creative and present the variable differently.
+{% if instruction_to_optimizer %}
+4. {{instruction_to_optimizer}}
+{% endif %}
+"""
+
 
 @dataclass
 class Instruction(DataClass):
@@ -119,6 +132,25 @@ class Instruction(DataClass):
     )
 
 
+@dataclass
+class TGDData(DataClass):
+    reasoning: str = field(metadata={"desc": "Why the variable is proposed this way"})
+    proposed_variable: str = field(metadata={"desc": "The proposed variable"})
+
+
+@dataclass
+class TGDOptimizerTrace:
+    api_kwargs: Dict[str, Any] = field(
+        metadata={
+            "desc": "The api_kwargs for components like Generator and Retriever that pass to the model client"
+        },
+        default=None,
+    )
+    output: TGDData = field(
+        metadata={"desc": "The output of the TGD optimizer"}, default=None
+    )
+
+
 new_variable_tags = ["<VARIABLE>", "</VARIABLE>"]
 
 
@@ -134,6 +166,7 @@ def extract_new_variable(text: str) -> str:
     return matches[0].strip()
 
 
+@trace_generator_states()
 class TGDOptimizer(TextOptimizer):
     __doc__ = """Textual Gradient Descent(LLM) optimizer for text-based variables."""
 
@@ -141,6 +174,7 @@ class TGDOptimizer(TextOptimizer):
     params: ParamsT
     constraints: List[str]
     params_history: Dict[str, List[HistoryPrompt]] = {}  # id to history
+    # failed_proposals: Dict[str, List[HistoryPrompt]] = {}  # only need the value
 
     def __init__(
         self,
@@ -153,18 +187,25 @@ def __init__(
         in_context_examples: List[str] = None,  # TODO: in-context examples
         num_gradient_memory: int = 0,  # TODO: gradient memory and momentum, for now it is not useful
         max_past_history: int = 3,
+        # max_failed_proposals: int = 3,
     ):
         from adalflow.core.generator import Generator
         from adalflow.core import Prompt
+        from adalflow.components.output_parsers.dataclass_parser import DataClassParser
 
         super().__init__()
         self.params = params
         self.constraints = constraints or []
+        self.data_class = TGDData
+        self.output_parser = DataClassParser(
+            data_class=self.data_class, return_data_class=True, format_type="json"
+        )
         self.optimizer_system_prompt = Prompt(
             template=optimizer_system_prompt,
             prompt_kwargs={
-                "new_variable_start_tag": new_variable_tags[0],
-                "new_variable_end_tag": new_variable_tags[1],
+                # "new_variable_start_tag": new_variable_tags[0],
+                # "new_variable_end_tag": new_variable_tags[1],
+                "output_format_str": self.output_parser.get_output_format_str(),
             },
         )
         self.variable_and_peers_info = Prompt(
@@ -177,17 +218,21 @@ def __init__(
         self.num_gradient_memory = num_gradient_memory
         self.gradient_memory_dict = defaultdict(list)  # id to num_gradient_memory
         self.do_gradient_memory = self.num_gradient_memory > 0
+
         self.llm_optimizer = Generator(
             model_client=model_client,
             model_kwargs=model_kwargs,
             template=TEXT_GRAD_DESC_TEMPLATE,
+            output_processors=self.output_parser,
         )
 
         self.max_past_history = max_past_history
+        # self.max_failed_proposals = max_failed_proposals
 
         # initate the past history for each parameter
         for param in self.params:
             self.params_history[param.id] = []
+            # self.failed_proposals[param.id] = []
 
     @property
     def constraint_text(self):
@@ -242,6 +287,40 @@ def render_history(self, param_id: str) -> List[str]:
             history.to_yaml(exclude=["id"]) for history in self.params_history[param_id]
         ]
 
+    # def add_failed_proposal(self):
+    #     """Save a copy of the current value of the parameter in the failed proposals."""
+    #     for param in self.params:
+    #         failed_proposal = HistoryPrompt(
+    #             id=param.id,
+    #             value=param.data,
+    #             eval_score=None,
+    #         )
+    #         self.failed_proposals[param.id].append(failed_proposal)
+    #         if len(self.failed_proposals[param.id]) > self.max_failed_proposals:
+    #             for _ in range(
+    #                 len(self.failed_proposals[param.id]) - self.max_failed_proposals
+    #             ):
+    #                 self.failed_proposals[param.id].pop()
+    #     # if param_id not in self.failed_proposals:
+    #     #     self.failed_proposals[param_id] = []
+    #     # failed_proposal = HistoryPrompt(
+    #     #     id=param_id,
+    #     #     value=value,
+    #     #     eval_score=None,
+    #     # )
+    #     # self.failed_proposals[param_id].append(failed_proposal)
+    #     # if len(self.failed_proposals[param_id]) > self.max_failed_proposals:
+    #     #     for _ in range(len(self.failed_proposals[param_id]) - self.max_failed_proposals):
+    #     #         self.failed_proposals[param_id].pop()
+
+    # def render_failed_proposals(self, param_id: str) -> List[str]:
+    #     if param_id not in self.failed_proposals:
+    #         return []
+    #     return [
+    #         history.to_yaml(exclude=["id", "eval_score"])
+    #         for history in self.failed_proposals[param_id]
+    #     ]
+
     # TODO: optimize with adalflow template for better readability
     def get_gradient_memory_text(self, param: Parameter) -> str:
         grad_memory = ""
@@ -260,7 +339,9 @@ def _get_user_prompt_kwargs(self, param: Parameter) -> Dict[str, str]:
 
         user_prompt_kwargs = {
             "variable_and_peers_info": variable_and_peer_info,
-            "variable_grad": param.get_gradient_and_context_text(),
+            "variable_grad": param.get_gradient_and_context_text(
+                skip_correct_sample=True
+            ),
             # constraints
             "constraint_text": self.constraint_text if self.do_constrained else None,
             # in-context examples
@@ -279,6 +360,12 @@ def _get_user_prompt_kwargs(self, param: Parameter) -> Dict[str, str]:
             "past_history": (
                 self.render_history(param.id) if self.max_past_history else None
             ),
+            # failed proposals
+            # "failed_proposals": (
+            #     self.render_failed_proposals(param.id)
+            #     if self.max_failed_proposals
+            #     else None
+            # ),
         }
 
         return user_prompt_kwargs
@@ -286,7 +373,7 @@ def _get_user_prompt_kwargs(self, param: Parameter) -> Dict[str, str]:
     # TODO: better way to update the gradient memory
     def update_gradient_memory(self, param: Parameter):
         self.gradient_memory_dict[param.id].append(
-            {"value": param.get_gradient_and_context_text()}
+            {"value": param.get_gradient_and_context_text(skip_correct_sample=True)}
         )
 
     def zero_grad(self):
@@ -299,6 +386,8 @@ def propose(self):
         if self.proposing:
             raise ValueError("Already proposing a value.")
 
+        print("Proposing a new value.")
+
         # no cache so that new proposal can be made
         no_cache = True
         # print("Proposing a new value.")
@@ -327,12 +416,22 @@ def propose(self):
             )
             prompt_str = self.llm_optimizer.get_prompt(**prompt_kwargs)
             log.debug(f"TGD LLM optimizer prompt: {prompt_str}")
-            proposed_data = response.data
+            proposed_data: TGDData = (
+                response.data
+                if response.data
+                else TGDData(
+                    reasoning="No reasoning", proposed_variable=response.raw_response
+                )
+            )
             log.info(f"Response from the optimizer: {response}")
             # extract the improved variable from the response
             # TODO: make it more robust
-            improved_variable = extract_new_variable(proposed_data)
+            # improved_variable = extract_new_variable(proposed_data)
+            improved_variable = proposed_data.proposed_variable
             param.propose_data(improved_variable)
+            param.trace_optimizer(api_kwargs=prompt_str, response=response)
+            print(f"prompt_str: {prompt_str}")
+            print(f"response: {response}")
             if self.do_gradient_memory:
                 self.update_gradient_memory(param)
         self.proposing = True
@@ -345,6 +444,7 @@ def revert(self):
             if not param.requires_opt:
                 continue
             param.revert_data()
+            param.trace_optimizer(api_kwargs=None, response=None)
         self.proposing = False
 
     def step(self):
diff --git a/adalflow/adalflow/optim/trainer/adal.py b/adalflow/adalflow/optim/trainer/adal.py
index f9bcfc10..cea31760 100644
--- a/adalflow/adalflow/optim/trainer/adal.py
+++ b/adalflow/adalflow/optim/trainer/adal.py
@@ -249,13 +249,18 @@ def evaluate_samples(
             )
 
             for future in concurrent.futures.as_completed(futures):
-                i = futures[future]
-                acc_list[i] = (
-                    future.result()
-                )  # Place the result in the correct position
-                progress_bar.update(
-                    1
-                )  # Update progress bar after each result is collected
+                try:
+                    i = futures[future]
+                    acc_list[i] = (
+                        future.result()
+                    )  # Place the result in the correct position
+                    progress_bar.update(
+                        1
+                    )  # Update progress bar after each result is collected
+                except Exception as e:
+
+                    progress_bar.close()
+                    raise ValueError(f"Exception in task {i}: {e}")
 
         avg_score = float(np.mean(np.array(acc_list)))
         return EvaluationResult(avg_score=avg_score, per_item_scores=acc_list)
@@ -394,6 +399,11 @@ def train_step(self, batch, batch_idx, num_workers: int = 2) -> List:
                 samples[i] = sample  # Keep the sample order aligned
                 # check the ordering
 
+                if isinstance(y_pred, Parameter):
+                    raise ValueError(f"y_pred_{i} is a Parameter, {y_pred}")
+
+                print(f"y_pred: {y_pred})")
+
                 assert (
                     y_pred.id == sample.id
                 ), f"ID mismatch: {y_pred.id} != {sample.id}, type: {type(y_pred)}"
@@ -469,14 +479,13 @@ def validation_step(self, batch, batch_idx, num_workers: int = 2) -> List:
         """
         # TODO: let use decide which mode to be
         self.task.eval()
+        self.task.use_teacher(mode=False)  # ensure the teacher is not used
         completed_y_preds, completed_samples, index_to_score = self.pred_step(
             batch, batch_idx, num_workers, running_eval=True, min_score=minimum_score
         )
         if index_to_score:
             # compute score from index_to_score
-            print(
-                f"completed_samples: {len(completed_samples)}, len: {len(list(index_to_score.values()))}"
-            )
+
             avg_score = np.mean(list(index_to_score.values())).item()
             acc_list = [None] * len(index_to_score)
             for i, score in index_to_score.items():
@@ -598,7 +607,9 @@ def configure_backward_engine_helper(
         if self.loss_fn:
             self.loss_fn.set_backward_engine(self.backward_engine)
 
-    def configure_callbacks(self, save_dir: Optional[str] = "traces", *args, **kwargs):
+    def configure_callbacks(
+        self, save_dir: Optional[str] = "traces", *args, **kwargs
+    ) -> List[str]:
         """In default we config the failure generator callback. User can overwrite this method to add more callbacks."""
         from adalflow.utils.global_config import get_adalflow_default_root_path
         import os
@@ -606,7 +617,7 @@ def configure_callbacks(self, save_dir: Optional[str] = "traces", *args, **kwarg
         if not save_dir:
             save_dir = "traces"
             save_dir = os.path.join(get_adalflow_default_root_path(), save_dir)
-        print(f"Saving traces to {save_dir}")
+        log.debug(f"Saving traces to {save_dir}")
         return self._auto_generator_callbacks(save_dir)
 
     def run_one_task_sample(self, sample: Any) -> Any:
@@ -640,9 +651,10 @@ def _find_all_generators(self) -> List[Tuple[str, "Generator"]]:
         for name, comp in self.task.named_components():
             if isinstance(comp, Generator):
                 all_generators.append((name, comp))
+        log.debug(f"all_generators: {all_generators}")
         return all_generators
 
-    def _auto_generator_callbacks(self, save_dir: str = "traces"):
+    def _auto_generator_callbacks(self, save_dir: str = "traces") -> List[str]:
         r"""Automatically generate callbacks."""
         from adalflow.core.types import GeneratorOutput
         from adalflow.tracing.generator_call_logger import (
@@ -652,7 +664,7 @@ def _auto_generator_callbacks(self, save_dir: str = "traces"):
 
         all_generators = self._find_all_generators()
 
-        print(f"all_generators: {all_generators}")
+        log.debug(f"all_generators: {all_generators}")
 
         def _on_completion_callback(
             output: GeneratorOutput,
@@ -672,9 +684,10 @@ def _on_completion_callback(
         # Register the callback for each generator
 
         file_paths = []
+        call_logger = GeneratorCallLogger(save_dir=save_dir)
         for name, generator in all_generators:
-            call_logger = GeneratorCallLogger(save_dir=save_dir)
-            call_logger.reset()
+
+            # call_logger.reset()
             call_logger.register_generator(name)
             logger_call = partial(call_logger.log_call, name)
             generator.register_callback(
@@ -682,10 +695,7 @@ def _on_completion_callback(
             )
             file_path = call_logger.get_log_location(name)
             file_paths.append(file_path)
-            print(
-                f"Registered callback for {name}, file path: {file_path}",
-                end="\n",
-            )
+            log.debug(f"Registered callback for {name}, file path: {file_path}")
         return file_paths
 
     def configure_demo_optimizer_helper(self) -> List[DemoOptimizer]:
diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py
index ae17b064..91a2fd16 100644
--- a/adalflow/adalflow/optim/trainer/trainer.py
+++ b/adalflow/adalflow/optim/trainer/trainer.py
@@ -27,11 +27,11 @@
 from adalflow.utils import save_json, load_json
 from adalflow.utils.cache import hash_text_sha1
 from adalflow.utils.data import DataLoader
-
+from adalflow.utils.logger import printc
 from adalflow.optim.types import TrainerValidateStats
 
 
-log = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class Trainer(Component):
@@ -91,9 +91,10 @@ class Trainer(Component):
     batch_val_score_threshold: Optional[float] = (
         1.0  # when acc_score >= this threshold, skip this batch
     )
-    max_error_samples: Optional[int] = 8
-    max_correct_samples: Optional[int] = 8
+    max_error_samples: Optional[int] = 2
+    max_correct_samples: Optional[int] = 2
     debug: bool = False
+    sequential_order: List[str] = ["text", "demo"]
 
     def __init__(
         self,
@@ -105,8 +106,8 @@ def __init__(
         num_workers: int = 4,
         ckpt_path: str = None,
         batch_val_score_threshold: Optional[float] = 1.0,
-        max_error_samples: Optional[int] = 4,
-        max_correct_samples: Optional[int] = 4,
+        max_error_samples: Optional[int] = 2,
+        max_correct_samples: Optional[int] = 2,
         max_proposals_per_step: int = 5,
         train_loader: Optional[Any] = None,
         train_dataset: Optional[Any] = None,
@@ -119,6 +120,7 @@ def __init__(
         exclude_input_fields_from_bootstrap_demos: bool = False,
         debug: bool = False,
         save_traces: bool = False,  # save traces in the few-shto demos
+        sequential_order: List[str] = ["text", "demo"],
         *args,
         **kwargs,
     ) -> None:
@@ -161,6 +163,7 @@ def __init__(
         self.exclude_input_fields_from_bootstrap_demos = (
             exclude_input_fields_from_bootstrap_demos
         )
+        self.sequential_order = sequential_order
 
     # TODO: need to support checkpoint resume too!
     def diagnose(self, dataset: Any, split: str = "train"):
@@ -188,7 +191,8 @@ def diagnose(self, dataset: Any, split: str = "train"):
             trainer_state = self.gather_trainer_states()
             self.prep_ckpt_file_path(trainer_state)
         save_path = os.path.join(self.ckpt_path, f"diagnose_{split}")
-        print(f"Save diagnose to {save_path}")
+        logger.debug(f"Save diagnose to {save_path}")
+        # One generator will be one file, all stats are in logger_metadata.json
         log_paths = self.adaltask.configure_callbacks(save_dir=save_path)
         # 2. evaluate
         acc = self.adaltask.validation_step(dataset, 0, self.num_workers)
@@ -206,15 +210,18 @@ def diagnose(self, dataset: Any, split: str = "train"):
             raise ValueError(
                 "dataset should have an attribute id for tracking the samples"
             )
-        print(f"sorted_indices: {sorted_indices}")
+        logger.debug(f"sorted_indices: {sorted_indices}")
+
         sorted_scores = [acc_per_item_scores[i] for i in sorted_indices]
-        print(f"sorted_scores: {sorted_scores}")
+        logger.debug(f"sorted_scores: {sorted_scores}")
         sorted_dataset = [dataset[i] for i in sorted_indices]
 
+        paths: Dict[str, List[str]] = {"Log": log_paths, "Diagnose": [], "Stats": []}
+
         # reorder the samples based on the score
         for log_path in log_paths:
             file_name = os.path.basename(log_path)
-            print(f"Loading log file: {file_name}")
+            logger.debug(f"Loading log file: {file_name}")
             logs = load_jsonl(log_path)
             try:
                 logs_dict = {log["output"]["id"]: log for log in logs}
@@ -232,6 +239,7 @@ def diagnose(self, dataset: Any, split: str = "train"):
 
             diagnose_file = os.path.join(log_dir, diagnose_filename)
             diagnose_items = []
+            stats_list: List[Dict] = []
             for i, log in enumerate(sorted_logs):
                 if log["score"] < 0.5:
                     diagnose_item = {
@@ -252,16 +260,68 @@ def diagnose(self, dataset: Any, split: str = "train"):
                 "total_error_samples": len(diagnose_items),
                 "avg_score": acc_score,
             }
-            save_json(stats, os.path.join(log_dir, "stats.json"))
-            print(f"Total error samples: {len(diagnose_items)}")
-            print(f"Saved diagnose to {diagnose_file}")
+            stat_path = os.path.join(log_dir, "stats.json")
+            save_json(stats, stat_path)
+            logger.debug(f"Total error samples: {len(diagnose_items)}")
+            logger.debug(f"Saved diagnose to {diagnose_file}")
+            paths["Diagnose"].append(diagnose_file)
+            paths["Stats"].append(stat_path)
+            stats_list.append(stats)
+
+        self.diagnose_report(
+            split=split,
+            acc_score=acc_score,
+            stats_list=stats_list,
+            log_paths=paths,
+        )
 
-        return acc_score, acc_per_item_scores, log_paths
+    def diagnose_report(
+        self,
+        split: str,
+        acc_score: Optional[float] = None,
+        stats_list: Optional[List[Dict]] = None,
+        log_paths: Optional[Dict[str, List[str]]] = None,
+    ):
+        import colorama
+        from colorama import Fore
+
+        # Initialize colorama
+        colorama.init(autoreset=True)
+        print(Fore.CYAN + "\n================== DIAGNOSE REPORT ==================\n")
+
+        print(Fore.GREEN + f"✔ Split: {split}")
+
+        # Check the accuracy score
+        if acc_score is not None:
+            print(Fore.GREEN + f"✔ Overall accuracy score: {acc_score:.2f}")
+        else:
+            print(Fore.RED + "✘ Accuracy score not provided or calculated.")
+
+        # List the overall stats
+        if stats_list is not None and len(stats_list) > 0:
+            print(Fore.GREEN + "✔ Overall stats:")
+            for idx, item in enumerate(stats_list):
+                print(Fore.YELLOW + f"  - {idx + 1}: {item}")
+
+        # Check for log paths
+        if log_paths is not None:
+            for key, paths in log_paths.items():
+                if len(paths) > 0:
+                    print(Fore.GREEN + f"✔ {key} paths:")
+                    for idx, path in enumerate(paths):
+                        print(Fore.YELLOW + f"  - {key} {idx + 1}: {path}")
+
+        else:
+            print(Fore.RED + "✘ No log paths available.")
+
+        # General summary
+        print(Fore.GREEN + "\n✔ Diagnose report completed successfully!")
+        print(Fore.CYAN + "\n=====================================================\n")
 
     def debug_report(
         self,
-        text_grad_debug_path: Optional[str] = None,
-        few_shot_demo_debug_path: Optional[str] = None,
+        text_grad_debug_path: Optional[Dict[str, object]] = None,
+        few_shot_demo_debug_path: Optional[Dict[str, object]] = None,
     ):
         import colorama
         from colorama import Fore
@@ -273,7 +333,7 @@ def debug_report(
         if text_grad_debug_path:
             print(Fore.GREEN + f"✔ Text grad debug path: {text_grad_debug_path}")
         else:
-            print(Fore.RED + "✘ Text grad debugging was not run.")
+            print(Fore.CYAN + "✘ Text grad debugging was not run.")
 
         if few_shot_demo_debug_path:
             print(
@@ -304,9 +364,12 @@ def fit(
         resume_from_ckpt: Optional[
             str
         ] = None,  # TODO: have a more comprehensive ckpt loading in the future
-    ):
+    ) -> Tuple[str, TrainerResult]:
         r"""
         train_loader: An iterable or collection of iterables specifying training samples.
+
+        Returns:
+            Tuple[str, TrainerResult]: Checkpoint file and the TrainerResult object
         """
         start_time = time.time()
 
@@ -434,7 +497,7 @@ def fit(
                     train_loader, train_dataset, val_dataset, test_dataset
                 )
             self.debug_report(text_grad_debug_path, few_shot_demo_debug_path)
-            return
+            return self.ckpt_file, trainer_results
 
         ########Run text_optimizers and demo optimizers in sequential order ########
         if (
@@ -443,7 +506,6 @@ def fit(
             and len(self.text_optimizers) > 0
         ):
             if self.strategy == "random":
-
                 self._fit_text_grad_demo_mix_random(
                     train_loader,
                     train_dataset,
@@ -465,41 +527,67 @@ def fit(
                 raise ValueError(f"Strategy {self.strategy} not supported")
 
         else:  # sequential, text first and demo second
-            if len(self.text_optimizers) > 0:
-                if self.strategy == "random":
-                    trainer_results = self._fit_text_grad_random(
-                        train_loader,
-                        val_dataset,
-                        test_dataset,
-                        trainer_results,
-                        starting_step=starting_step,
-                    )
-                    starting_step += self.max_steps
-                elif self.strategy == "constrained":
-                    trainer_results = self._fit_text_grad_constraint(
+
+            def run_text_optimizers(starting_step: int, trainer_results: TrainerResult):
+                if len(self.text_optimizers) > 0:
+                    if self.strategy == "random":
+                        trainer_results = self._fit_text_grad_random(
+                            train_loader,
+                            val_dataset,
+                            test_dataset,
+                            trainer_results,
+                            starting_step=starting_step,
+                        )
+                        starting_step += self.max_steps
+                    elif self.strategy == "constrained":
+                        trainer_results = self._fit_text_grad_constraint(
+                            train_loader,
+                            val_dataset,
+                            test_dataset,
+                            trainer_results=trainer_results,
+                            starting_step=starting_step,
+                        )
+                        starting_step += self.max_steps
+                    else:
+                        raise ValueError(f"Strategy {self.strategy} not supported")
+
+            def run_demo_optimizers(starting_step: int, trainer_results: TrainerResult):
+                if len(self.demo_optimizers) > 0:
+                    self.adaltask.configure_teacher_generator()
+                    self._fit_demos_random(
                         train_loader,
+                        train_dataset,
                         val_dataset,
                         test_dataset,
                         trainer_results=trainer_results,
                         starting_step=starting_step,
                     )
-                    starting_step += self.max_steps
-                else:
-                    raise ValueError(f"Strategy {self.strategy} not supported")
-            if len(self.demo_optimizers) > 0:
-                self.adaltask.configure_teacher_generator()  # attemp to use the newest teacher as
-                self._fit_demos_random(
-                    train_loader,
-                    train_dataset,
-                    val_dataset,
-                    test_dataset,
-                    trainer_results=trainer_results,
-                    starting_step=starting_step,
-                )
+
+            if self.sequential_order == ["text", "demo"]:
+                run_text_optimizers(starting_step, trainer_results)
+                run_demo_optimizers(starting_step, trainer_results)
+            else:
+                run_demo_optimizers(starting_step, trainer_results)
+                run_text_optimizers(starting_step, trainer_results)
+            # if len(self.text_optimizers) > 0:
+            #     run_text_optimizers(starting_step, trainer_results)
+
+            # if len(self.demo_optimizers) > 0:
+            #     run_demo_optimizers(starting_step, trainer_results)
+            # self.adaltask.configure_teacher_generator()  # attemp to use the newest teacher as
+            # self._fit_demos_random(
+            #     train_loader,
+            #     train_dataset,
+            #     val_dataset,
+            #     test_dataset,
+            #     trainer_results=trainer_results,
+            #     starting_step=starting_step,
+            # )
 
         end_time = time.time()
         print(f"Training time: {end_time - start_time}s")
         print(f"ckpt_file: {self.ckpt_file}")
+        return self.ckpt_file, trainer_results
 
     @staticmethod
     def _estimate_num_epochs(train_loader: Any, max_steps: int):
@@ -582,7 +670,7 @@ def prep_ckpt_file_path(self, trainer_state: Dict[str, Any] = None):
             self.ckpt_path = os.path.join(
                 default_root_path, "ckpt", self.adaltask.__class__.__name__
             )
-            print(f"Checkpoint path: {self.ckpt_path}")
+            logger.debug(f"Checkpoint path: {self.ckpt_path}")
         os.makedirs(self.ckpt_path, exist_ok=True)
         # list all existing checkpoints with the same file name prefix
         hash_key = (
@@ -627,7 +715,9 @@ def _pre_fit(self, val_dataset: Any, test_dataset: Any) -> TrainerResult:
 
     def _fit_demos_one_step_for_debug(
         self, train_loader, train_dataset: Any, val_dataset: Any, test_dataset: Any
-    ) -> str:
+    ) -> Dict[str, object]:
+        """Trace both the teacher and the student demos with scores and for sampling.
+        For demos: we need to run both the teacher mode and the student mode."""
 
         # get_logger(level="DEBUG")
         print("Fitting using Random Demo Optimizer")
@@ -659,7 +749,7 @@ def _fit_demos_one_step_for_debug(
                 f"Expected 2 traces, got {len(demo_params[0]._traces)}, traces: {demo_params[0]._traces}"
             )
 
-        print(f"Teacher y_preds: {y_preds[0].to_dict()}")
+        # print(f"Teacher y_preds: {y_preds[0].to_dict()}")
 
         y_preds_outputs = [p.full_response for p in y_preds]
 
@@ -676,7 +766,7 @@ def _fit_demos_one_step_for_debug(
         losses: List[Parameter] = self.adaltask.loss_step(
             batch, y_preds, 0, self.num_workers
         )
-        print(f"Losses: {losses[0].to_dict()}")
+        # print(f"Losses: {losses[0].to_dict()}")
         self._demo_optimizers_add_scores(
             [sample.id for sample in batch], batch_per_item_scores, is_teacher=True
         )
@@ -688,17 +778,21 @@ def _fit_demos_one_step_for_debug(
 
         print(f"Graph saved to {graph_path}")
 
-        # check the score
+        # check the score of one param
         for key, val in demo_params[0]._traces.items():
-            print(f"param: {key}, val: {val}")
+            print(f"param: {key}, {demo_params[0].name}, val: {val}")
             score = val.score
             if score is None:
                 raise ValueError("Score is None")
             print(f"param: {key}, score: {score}")
-        print(f"Loss after backward: {losses[0].to_dict()}")
+        # print(f"Loss after backward: {losses[0].to_dict()}")
 
         # tracking the bootstrap so we wont repeat the same samples
 
+        # 2. run student mode
+
+        demo_debug_result_path = None
+
         for batch_idx, batch in enumerate(train_loader):
             print(f"Training step: {batch_idx}")
             if batch_idx > 0:
@@ -717,43 +811,51 @@ def _fit_demos_one_step_for_debug(
             )
             # for loss in losses_student:
             #     loss.backward()
+            # Check the eval result
             y_preds_outputs = [p.full_response for p in y_preds_student]
             eval_result = self.adaltask.evaluate_samples(batch, y_preds_outputs)
             print(f"Eval result: {eval_result.avg_score}")
-            eval_score_per_item = eval_result.per_item_scores
-
-            # bootstrap
-            batch_for_teacher = []
-            losses_teacher = []
-
-            for i, (sample, item_score) in enumerate(zip(batch, eval_score_per_item)):
-                # use teacher
-                if sample.id in pred_teacher:
-                    continue
-                # if item_score < 0.5:
-                batch_for_teacher.append(sample)
-                pred_teacher.add(sample.id)
-            # run teacher, use teachers's output instead of the initial output (bootstrap)
-            if len(batch_for_teacher) > 0:
-                print(f"Using teacher for {len(batch_for_teacher)} samples")
-                self.adaltask.use_teacher()
-                y_preds_teacher = self.adaltask.train_step(
-                    batch_for_teacher, batch_idx, self.num_workers
-                )
-                losses_teacher: List[Parameter] = self.adaltask.loss_step(  # noqa F841
-                    batch_for_teacher, y_preds_teacher, batch_idx, self.num_workers
-                )
-                self._demo_optimizers_add_scores(
-                    [sample.id for sample in batch_for_teacher],
-                    eval_score_per_item,
-                    is_teacher=True,
-                )
+            # eval_score_per_item = eval_result.per_item_scores
+
+            # bootstrap a batch
+            # batch_for_teacher = []
+            # losses_teacher = []
+
+            # for i, (sample, item_score) in enumerate(zip(batch, eval_score_per_item)):
+
+            #     # use teacher
+            #     if sample.id in pred_teacher:
+            #         continue
+            #     # if item_score < 0.5:
+            #     pred_teacher.add(sample.id)
+            #     batch_for_teacher.append(sample)
+            # # run teacher, use teachers's output instead of the initial output (bootstrap)
+            # if len(batch_for_teacher) > 0:
+            #     print(f"Using teacher for {len(batch_for_teacher)} samples")
+            #     self.adaltask.use_teacher()
+            #     y_preds_teacher = self.adaltask.train_step(
+            #         batch_for_teacher, batch_idx, self.num_workers
+            #     )
+            #     losses_teacher: List[Parameter] = self.adaltask.loss_step(  # noqa F841
+            #         batch_for_teacher, y_preds_teacher, batch_idx, self.num_workers
+            #     )
+            #     self._demo_optimizers_add_scores(
+            #         [sample.id for sample in batch_for_teacher],
+            #         eval_score_per_item,
+            #         is_teacher=True,
+            #     )
+
+            # loss_students backward
+            for loss in losses_student:
+                loss.backward()
 
             # propose
             self._demo_optimizers_propose()
             graph_path = os.path.join(debug_path, "student_graph")
 
-            losses_student[0].draw_graph(filepath=graph_path)
+            demo_debug_result_path = losses_student[0].draw_graph(
+                filepath=graph_path
+            )  # noqa F841
 
             # test step
             self._demo_optimizers_step()
@@ -765,11 +867,13 @@ def _fit_demos_one_step_for_debug(
             opt_params = []
             for opt in self.demo_optimizers:
                 opt_params.extend(opt.params)
-            print(f"Opt params: {opt_params}")
+            # print(f"Opt params: {opt_params}")
             for name, param in self.adaltask.named_parameters():
 
                 if param.param_type == ParameterType.DEMOS:
-                    print(f"Demo param: {name}, value: {param.data}, param: {param}")
+                    print(
+                        f"Demo param: {name}, value: {param.data}, param: {param.name}"
+                    )
                     if param.data is None:
                         raise ValueError("Demo param data is None")
 
@@ -782,10 +886,12 @@ def _fit_demos_one_step_for_debug(
                     if len(param._demos) == 0:
                         raise ValueError(f"No demos found, param: {param}")
 
-        return debug_path
+        return demo_debug_result_path
 
-    def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> str:
-        print("Debugging fitting one step with batch size 2 for text optimizer")
+    def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> Dict[str, str]:
+        printc(
+            "Debugging fitting one step with batch size 2 for text optimizer", "blue"
+        )
 
         self.prep_ckpt_file_path()
         debug_path = os.path.join(self.ckpt_path, "debug_text_grads")
@@ -796,10 +902,13 @@ def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> str:
         self.adaltask.train()  # this will turn everything to train mode
         correct_loss = None
         failed_loss = None
-        print("Finding one successful and one failed loss")
+        all_losses = []
+        printc("Finding one successful and one failed loss", "blue")
         for batch in train_loader:
             y_preds = self.adaltask.train_step(batch, 0, self.num_workers)
             losses = self.adaltask.loss_step(batch, y_preds, 0, self.num_workers)
+            # Collect all losses
+            all_losses.extend(losses)
             for loss in losses:
                 if loss.data > 0.5:
                     correct_loss = loss
@@ -808,13 +917,27 @@ def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> str:
             if correct_loss is not None and failed_loss is not None:
                 print("Found correct and failed loss")
                 break
+
+        # Handle case where one or both losses are None
+        if correct_loss is None or failed_loss is None:
+            if not all_losses:
+                raise ValueError("No losses found in the dataset.")
+
+            # Sort all_losses by their data values
+            all_losses.sort(key=lambda x: x.data, reverse=True)  # Highest to lowest
+
+            # Assign first and last loss in sorted list
+            correct_loss = all_losses[0]
+            failed_loss = all_losses[-1]
+            print("Assigned correct_loss and failed_loss from sorted losses.")
+
         total_loss = sum_ops([correct_loss, failed_loss])
         total_loss.backward()
         # test optimizer
         self._propose_text_optimizers()
 
-        total_loss.draw_graph(filepath=debug_path)
-        return debug_path
+        debug_files = total_loss.draw_graph(filepath=debug_path, full_trace=True)
+        return debug_files
 
     def _set_demo_optimizers_dataset(self, train_dataset: Any):
         # init the dataset
@@ -829,6 +952,7 @@ def _demo_optimizers_add_scores(
         self, ids: List[str], scores: List[float], is_teacher: bool = True
     ):
         for opt in self.demo_optimizers:
+            # opt = cast(DemoOptimizer, opt)
             opt.add_scores(ids, scores, is_teacher)
 
     def _demo_optimizers_revert(self):
@@ -858,6 +982,10 @@ def _propose_text_optimizers(self):
         for text_optimizer in self.text_optimizers:
             text_optimizer.propose()
 
+    # def _add_failed_proposals_text_optimizers(self):
+    #     for opt in self.text_optimizers:
+    #         opt.add_failed_proposal()
+
     def _get_trainable_text_params(self):
         params = []
         for opt in self.text_optimizers:
@@ -899,7 +1027,7 @@ def _fit_text_grad_demo_mix_constrained(
     ):
         from adalflow.optim.parameter import Parameter
 
-        log.info("Fitting using Textual Gradient Descent")
+        logger.info("Fitting using Textual Gradient Descent")
         trainer_results = (
             self._pre_fit(val_dataset, test_dataset)
             if trainer_results is None
@@ -935,7 +1063,7 @@ def _fit_text_grad_demo_mix_constrained(
                 )
                 # moving batch
                 all_samples.extend(batch)
-                all_losses.extend(losses)
+                all_losses.extend(losses)  # student losses
                 # extract the non-parameter y_preds
                 all_y_preds.extend(
                     [y.full_response for y in y_preds if isinstance(y, Parameter)]
@@ -993,6 +1121,7 @@ def _fit_text_grad_demo_mix_constrained(
                     print(
                         "No proposal can improve the subset and full set, go to next step"
                     )
+                    # self._add_failed_proposals_text_optimizers()
 
                     self._add_one_step_in_trainer_results(
                         trainer_results,
@@ -1001,6 +1130,7 @@ def _fit_text_grad_demo_mix_constrained(
                         trainer_results.prompts[-1],
                         total_steps,
                     )
+
                     continue
 
                 # set the batch size to the size of the validation set
@@ -1065,7 +1195,7 @@ def _fit_text_grad_demo_mix_random(
         train_results: TrainerResult = None,
         starting_step: int = 0,
     ):
-        log.info("Fitting using Textual Gradient Descent")
+        logger.info("Fitting using Textual Gradient Descent")
 
         trainer_results = (
             self._pre_fit(val_dataset, test_dataset)
@@ -1207,7 +1337,7 @@ def _fit_demos_random(
         trainer_results: TrainerResult,
         starting_step: int,
     ):
-        log.info("Fitting using Random Demo Optimizer")
+        logger.info("Fitting using Random Demo Optimizer")
         # self.adaltask.train()
         trainer_results = (
             self._pre_fit(val_dataset, test_dataset)
@@ -1250,7 +1380,7 @@ def _fit_demos_random(
                 loss.backward_engine_disabled = (
                     True  # temporary disable the backward engine
                 )
-                loss.backward()  # TODO: ensure no gradients in the backward, disable backward engine
+                loss.backward()  # TODO: ensure no gradients in the backward, disable backward engine, trace the score to each class instead
             # Trace the teacher run
             self.adaltask.use_teacher(True)
             self.adaltask.train()
@@ -1397,7 +1527,7 @@ def _fit_text_grad_random(
         trainer_results: TrainerResult = None,
         starting_step: int = 0,
     ) -> TrainerResult:
-        log.info("Fitting using Textual Gradient Descent")
+        logger.info("Fitting using Textual Gradient Descent")
         trainer_results = (
             self._pre_fit(val_dataset, test_dataset)
             if trainer_results is None
@@ -1441,13 +1571,13 @@ def _fit_text_grad_random(
                     minimum_score=last_val_score,
                 )
                 val_score = val_output.avg_score
-                self._add_history_text_optimizers(val_score)
 
                 if val_score > last_val_score:
+
                     print(f"Optimizer step: {val_score} > {last_val_score}")
                     # self.optimizer.step()
                     self._step_text_optimizers()
-
+                    self._add_history_text_optimizers(val_score)  # track top performor
                     # test the model
                     test_output = self.adaltask.validation_step(
                         test_dataset, total_steps, self.num_workers
@@ -1461,6 +1591,9 @@ def _fit_text_grad_random(
                         total_steps,
                     )
                 else:
+                    # if val_score < last_val_score:
+                    #     self._add_failed_proposals_text_optimizers() # track failed proposals
+
                     print(f"Optimizer revert: {val_score} <= {last_val_score}")
                     # self.optimizer.revert()
                     self._revert_text_optimizers()
@@ -1606,6 +1739,9 @@ def _text_grad_constraint_propose_step(
         all_y_preds,
         include_demo_optimizers: bool = False,
     ):
+        """Handles both the mixed training and the separate training.
+        When include_demo_optimizers is True, the demo optimizers are included in the training
+        """
         # comptute moving batch acc
         from adalflow.optim.parameter import Parameter
 
@@ -1677,6 +1813,7 @@ def _text_grad_constraint_propose_step(
                 print(
                     f"Fail subset check, try next proposal: {val_score} <= {subset_score}"
                 )
+                # self._add_failed_proposals_text_optimizers()
                 self._track_effectiveness("subset", False)
                 self._revert_text_optimizers()
                 if include_demo_optimizers:
@@ -1696,6 +1833,7 @@ def _text_grad_constraint_propose_step(
                     f"Fail full check, try next proposal: {new_move_batch_score} < {move_batch_score}"
                 )
                 self._track_effectiveness("fullset", False)
+                # self._add_failed_proposals_text_optimizers()
                 self._revert_text_optimizers()
                 if include_demo_optimizers:
                     self._demo_optimizers_revert()
@@ -1741,7 +1879,7 @@ def _fit_text_grad_constraint(
     ) -> TrainerResult:
         from adalflow.optim.parameter import Parameter
 
-        log.info("Fitting using Textual Gradient Descent with constraints")
+        logger.info("Fitting using Textual Gradient Descent with constraints")
         trainer_results = (
             self._pre_fit(val_dataset, test_dataset)
             if trainer_results is None
@@ -1813,11 +1951,13 @@ def _fit_text_grad_constraint(
                         minimum_score=last_val_score,
                     )
                     val_score = val_output.avg_score
-                    self._add_history_text_optimizers(val_score)
 
                     if val_score > last_val_score:
                         print(f"Optimizer step: {val_score} > {last_val_score}")
                         # self.optimizer.step()
+                        self._add_history_text_optimizers(
+                            val_score
+                        )  # track top performor
                         self._step_text_optimizers()
 
                         # save the score
@@ -1849,6 +1989,7 @@ def _fit_text_grad_constraint(
                     else:
                         print(f"Optimizer revert: {val_score} <= {last_val_score}")
                         self._revert_text_optimizers()
+                        # self._add_failed_proposals_text_optimizers() # track failed proposals
                         self._track_effectiveness("valset", False)
                         self._add_one_step_in_trainer_results(
                             trainer_results,
diff --git a/adalflow/poetry.lock b/adalflow/poetry.lock
index bac6b6cc..92d3c9cb 100644
--- a/adalflow/poetry.lock
+++ b/adalflow/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -192,6 +192,21 @@ doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)",
 test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"]
 trio = ["trio (>=0.26.1)"]
 
+[[package]]
+name = "asttokens"
+version = "3.0.0"
+description = "Annotate AST trees with source code positions"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"},
+    {file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"},
+]
+
+[package.extras]
+astroid = ["astroid (>=2,<4)"]
+test = ["astroid (>=2,<4)", "pytest", "pytest-cov", "pytest-xdist"]
+
 [[package]]
 name = "async-timeout"
 version = "5.0.1"
@@ -272,17 +287,17 @@ files = [
 
 [[package]]
 name = "boto3"
-version = "1.35.77"
+version = "1.35.80"
 description = "The AWS SDK for Python"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.35.77-py3-none-any.whl", hash = "sha256:a09871805f8e462349a1c33c23eb413668df0bf68424e61d53518e1a7d883b2f"},
-    {file = "boto3-1.35.77.tar.gz", hash = "sha256:cc819cdbccbc2d0dc185f1dcfe74cf3809489c4cae63c2e5d6a557aa0c5ab928"},
+    {file = "boto3-1.35.80-py3-none-any.whl", hash = "sha256:21a3b18c3a7fd20e463708fe3fa035983105dc7f3a1c274e1903e1583ab91159"},
+    {file = "boto3-1.35.80.tar.gz", hash = "sha256:50dae461ab5fbedfb81b690895d48a918fed0d5fdff37be1c4232770c0dc9712"},
 ]
 
 [package.dependencies]
-botocore = ">=1.35.77,<1.36.0"
+botocore = ">=1.35.80,<1.36.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -291,13 +306,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.35.77"
+version = "1.35.80"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.35.77-py3-none-any.whl", hash = "sha256:3faa27d65841499762228902d7e215fa99a4c2fdc76c9113e1c3f339bdf685b8"},
-    {file = "botocore-1.35.77.tar.gz", hash = "sha256:17b778016644e9342ca3ff2f430c1d1db0c6126e9b41a57cff52ac58e7a455e0"},
+    {file = "botocore-1.35.80-py3-none-any.whl", hash = "sha256:36e589dccb62380abd628b08fecfa2f7c89b99f41ec9fc42c467c94008c0be4a"},
+    {file = "botocore-1.35.80.tar.gz", hash = "sha256:b8dfceca58891cb2711bd6455ec4f7159051f3796e0f64adef9bb334f19d8a92"},
 ]
 
 [package.dependencies]
@@ -868,6 +883,20 @@ files = [
 [package.extras]
 test = ["pytest (>=6)"]
 
+[[package]]
+name = "executing"
+version = "2.1.0"
+description = "Get the currently executing AST node of a frame, and other information"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf"},
+    {file = "executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab"},
+]
+
+[package.extras]
+tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
+
 [[package]]
 name = "faiss-cpu"
 version = "1.9.0.post1"
@@ -971,61 +1000,61 @@ typing = ["typing-extensions (>=4.12.2)"]
 
 [[package]]
 name = "fonttools"
-version = "4.55.2"
+version = "4.55.3"
 description = "Tools to manipulate font files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fonttools-4.55.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bef0f8603834643b1a6419d57902f18e7d950ec1a998fb70410635c598dc1a1e"},
-    {file = "fonttools-4.55.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:944228b86d472612d3b48bcc83b31c25c2271e63fdc74539adfcfa7a96d487fb"},
-    {file = "fonttools-4.55.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f0e55f5da594b85f269cfbecd2f6bd3e07d0abba68870bc3f34854de4fa4678"},
-    {file = "fonttools-4.55.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b1a6e576db0c83c1b91925bf1363478c4bb968dbe8433147332fb5782ce6190"},
-    {file = "fonttools-4.55.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:616368b15716781bc84df5c2191dc0540137aaef56c2771eb4b89b90933f347a"},
-    {file = "fonttools-4.55.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7bbae4f3915225c2c37670da68e2bf18a21206060ad31dfb95fec91ef641caa7"},
-    {file = "fonttools-4.55.2-cp310-cp310-win32.whl", hash = "sha256:8b02b10648d69d67a7eb055f4d3eedf4a85deb22fb7a19fbd9acbae7c7538199"},
-    {file = "fonttools-4.55.2-cp310-cp310-win_amd64.whl", hash = "sha256:bbea0ab841113ac8e8edde067e099b7288ffc6ac2dded538b131c2c0595d5f77"},
-    {file = "fonttools-4.55.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d34525e8141286fa976e14806639d32294bfb38d28bbdb5f6be9f46a1cd695a6"},
-    {file = "fonttools-4.55.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ecd1c2b1c2ec46bb73685bc5473c72e16ed0930ef79bc2919ccadc43a99fb16"},
-    {file = "fonttools-4.55.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9008438ad59e5a8e403a62fbefef2b2ff377eb3857d90a3f2a5f4d674ff441b2"},
-    {file = "fonttools-4.55.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:131591ac8d7a47043aaf29581aba755ae151d46e49d2bf49608601efd71e8b4d"},
-    {file = "fonttools-4.55.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4c83381c3e3e3d9caa25527c4300543578341f21aae89e4fbbb4debdda8d82a2"},
-    {file = "fonttools-4.55.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:42aca564b575252fd9954ed0d91d97a24de24289a16ce8ff74ed0bdf5ecebf11"},
-    {file = "fonttools-4.55.2-cp311-cp311-win32.whl", hash = "sha256:c6457f650ebe15baa17fc06e256227f0a47f46f80f27ec5a0b00160de8dc2c13"},
-    {file = "fonttools-4.55.2-cp311-cp311-win_amd64.whl", hash = "sha256:5cfa67414d7414442a5635ff634384101c54f53bb7b0e04aa6a61b013fcce194"},
-    {file = "fonttools-4.55.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:18f082445b8fe5e91c53e6184f4c1c73f3f965c8bcc614c6cd6effd573ce6c1a"},
-    {file = "fonttools-4.55.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:27c0f91adbbd706e8acd1db73e3e510118e62d0ffb651864567dccc5b2339f90"},
-    {file = "fonttools-4.55.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d8ccce035320d63dba0c35f52499322f5531dbe85bba1514c7cea26297e4c54"},
-    {file = "fonttools-4.55.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96e126df9615df214ec7f04bebcf60076297fbc10b75c777ce58b702d7708ffb"},
-    {file = "fonttools-4.55.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:508ebb42956a7a931c4092dfa2d9b4ffd4f94cea09b8211199090d2bd082506b"},
-    {file = "fonttools-4.55.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c1b9de46ef7b683d50400abf9f1578eaceee271ff51c36bf4b7366f2be29f498"},
-    {file = "fonttools-4.55.2-cp312-cp312-win32.whl", hash = "sha256:2df61d9fc15199cc86dad29f64dd686874a3a52dda0c2d8597d21f509f95c332"},
-    {file = "fonttools-4.55.2-cp312-cp312-win_amd64.whl", hash = "sha256:d337ec087da8216a828574aa0525d869df0a2ac217a2efc1890974ddd1fbc5b9"},
-    {file = "fonttools-4.55.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:10aff204e2edee1d312fa595c06f201adf8d528a3b659cfb34cd47eceaaa6a26"},
-    {file = "fonttools-4.55.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:09fe922a3eff181fd07dd724cdb441fb6b9fc355fd1c0f1aa79aca60faf1fbdd"},
-    {file = "fonttools-4.55.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:487e1e8b524143a799bda0169c48b44a23a6027c1bb1957d5a172a7d3a1dd704"},
-    {file = "fonttools-4.55.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b1726872e09268bbedb14dc02e58b7ea31ecdd1204c6073eda4911746b44797"},
-    {file = "fonttools-4.55.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6fc88cfb58b0cd7b48718c3e61dd0d0a3ee8e2c86b973342967ce09fbf1db6d4"},
-    {file = "fonttools-4.55.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e857fe1859901ad8c5cab32e0eebc920adb09f413d2d73b74b677cf47b28590c"},
-    {file = "fonttools-4.55.2-cp313-cp313-win32.whl", hash = "sha256:81ccd2b3a420b8050c7d9db3be0555d71662973b3ef2a1d921a2880b58957db8"},
-    {file = "fonttools-4.55.2-cp313-cp313-win_amd64.whl", hash = "sha256:d559eb1744c7dcfa90ae60cb1a4b3595e898e48f4198738c321468c01180cd83"},
-    {file = "fonttools-4.55.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6b5917ef79cac8300b88fd6113003fd01bbbbea2ea060a27b95d8f77cb4c65c2"},
-    {file = "fonttools-4.55.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:663eba5615d6abaaf616432354eb7ce951d518e43404371bcc2b0694ef21e8d6"},
-    {file = "fonttools-4.55.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:803d5cef5fc47f44f5084d154aa3d6f069bb1b60e32390c225f897fa19b0f939"},
-    {file = "fonttools-4.55.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc5f100de0173cc39102c0399bd6c3bd544bbdf224957933f10ee442d43cddd"},
-    {file = "fonttools-4.55.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3d9bbc1e380fdaf04ad9eabd8e3e6a4301eaf3487940893e9fd98537ea2e283b"},
-    {file = "fonttools-4.55.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:42a9afedff07b6f75aa0f39b5e49922ac764580ef3efce035ca30284b2ee65c8"},
-    {file = "fonttools-4.55.2-cp38-cp38-win32.whl", hash = "sha256:f1c76f423f1a241df08f87614364dff6e0b7ce23c962c1b74bd995ec7c0dad13"},
-    {file = "fonttools-4.55.2-cp38-cp38-win_amd64.whl", hash = "sha256:25062b6ca03464dd5179fc2040fb19e03391b7cc49b9cc4f879312e638605c5c"},
-    {file = "fonttools-4.55.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d1100d8e665fe386a79cab59446992de881ea74d0d6c191bb988642692aa2421"},
-    {file = "fonttools-4.55.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dbdc251c5e472e5ae6bc816f9b82718b8e93ff7992e7331d6cf3562b96aa268e"},
-    {file = "fonttools-4.55.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0bf24d2b02dbc9376d795a63062632ff73e3e9e60c0229373f500aed7e86dd7"},
-    {file = "fonttools-4.55.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4ff250ed4ff05015dfd9cf2adf7570c7a383ca80f4d9732ac484a5ed0d8453c"},
-    {file = "fonttools-4.55.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:44cf2a98aa661dbdeb8c03f5e405b074e2935196780bb729888639f5276067d9"},
-    {file = "fonttools-4.55.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22ef222740eb89d189bf0612eb98fbae592c61d7efeac51bfbc2a1592d469557"},
-    {file = "fonttools-4.55.2-cp39-cp39-win32.whl", hash = "sha256:93f439ca27e55f585e7aaa04a74990acd983b5f2245e41d6b79f0a8b44e684d8"},
-    {file = "fonttools-4.55.2-cp39-cp39-win_amd64.whl", hash = "sha256:627cf10d6f5af5bec6324c18a2670f134c29e1b7dce3fb62e8ef88baa6cba7a9"},
-    {file = "fonttools-4.55.2-py3-none-any.whl", hash = "sha256:8e2d89fbe9b08d96e22c7a81ec04a4e8d8439c31223e2dc6f2f9fc8ff14bdf9f"},
-    {file = "fonttools-4.55.2.tar.gz", hash = "sha256:45947e7b3f9673f91df125d375eb57b9a23f2a603f438a1aebf3171bffa7a205"},
+    {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1dcc07934a2165ccdc3a5a608db56fb3c24b609658a5b340aee4ecf3ba679dc0"},
+    {file = "fonttools-4.55.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f7d66c15ba875432a2d2fb419523f5d3d347f91f48f57b8b08a2dfc3c39b8a3f"},
+    {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e4ae3592e62eba83cd2c4ccd9462dcfa603ff78e09110680a5444c6925d841"},
+    {file = "fonttools-4.55.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62d65a3022c35e404d19ca14f291c89cc5890032ff04f6c17af0bd1927299674"},
+    {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d342e88764fb201286d185093781bf6628bbe380a913c24adf772d901baa8276"},
+    {file = "fonttools-4.55.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:dd68c87a2bfe37c5b33bcda0fba39b65a353876d3b9006fde3adae31f97b3ef5"},
+    {file = "fonttools-4.55.3-cp310-cp310-win32.whl", hash = "sha256:1bc7ad24ff98846282eef1cbeac05d013c2154f977a79886bb943015d2b1b261"},
+    {file = "fonttools-4.55.3-cp310-cp310-win_amd64.whl", hash = "sha256:b54baf65c52952db65df39fcd4820668d0ef4766c0ccdf32879b77f7c804d5c5"},
+    {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8c4491699bad88efe95772543cd49870cf756b019ad56294f6498982408ab03e"},
+    {file = "fonttools-4.55.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5323a22eabddf4b24f66d26894f1229261021dacd9d29e89f7872dd8c63f0b8b"},
+    {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5480673f599ad410695ca2ddef2dfefe9df779a9a5cda89503881e503c9c7d90"},
+    {file = "fonttools-4.55.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da9da6d65cd7aa6b0f806556f4985bcbf603bf0c5c590e61b43aa3e5a0f822d0"},
+    {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e894b5bd60d9f473bed7a8f506515549cc194de08064d829464088d23097331b"},
+    {file = "fonttools-4.55.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:aee3b57643827e237ff6ec6d28d9ff9766bd8b21e08cd13bff479e13d4b14765"},
+    {file = "fonttools-4.55.3-cp311-cp311-win32.whl", hash = "sha256:eb6ca911c4c17eb51853143624d8dc87cdcdf12a711fc38bf5bd21521e79715f"},
+    {file = "fonttools-4.55.3-cp311-cp311-win_amd64.whl", hash = "sha256:6314bf82c54c53c71805318fcf6786d986461622dd926d92a465199ff54b1b72"},
+    {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f9e736f60f4911061235603a6119e72053073a12c6d7904011df2d8fad2c0e35"},
+    {file = "fonttools-4.55.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a8aa2c5e5b8b3bcb2e4538d929f6589a5c6bdb84fd16e2ed92649fb5454f11c"},
+    {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07f8288aacf0a38d174445fc78377a97fb0b83cfe352a90c9d9c1400571963c7"},
+    {file = "fonttools-4.55.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8d5e8916c0970fbc0f6f1bece0063363bb5857a7f170121a4493e31c3db3314"},
+    {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ae3b6600565b2d80b7c05acb8e24d2b26ac407b27a3f2e078229721ba5698427"},
+    {file = "fonttools-4.55.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:54153c49913f45065c8d9e6d0c101396725c5621c8aee744719300f79771d75a"},
+    {file = "fonttools-4.55.3-cp312-cp312-win32.whl", hash = "sha256:827e95fdbbd3e51f8b459af5ea10ecb4e30af50221ca103bea68218e9615de07"},
+    {file = "fonttools-4.55.3-cp312-cp312-win_amd64.whl", hash = "sha256:e6e8766eeeb2de759e862004aa11a9ea3d6f6d5ec710551a88b476192b64fd54"},
+    {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a430178ad3e650e695167cb53242dae3477b35c95bef6525b074d87493c4bf29"},
+    {file = "fonttools-4.55.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:529cef2ce91dc44f8e407cc567fae6e49a1786f2fefefa73a294704c415322a4"},
+    {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e75f12c82127486fac2d8bfbf5bf058202f54bf4f158d367e41647b972342ca"},
+    {file = "fonttools-4.55.3-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:859c358ebf41db18fb72342d3080bce67c02b39e86b9fbcf1610cca14984841b"},
+    {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:546565028e244a701f73df6d8dd6be489d01617863ec0c6a42fa25bf45d43048"},
+    {file = "fonttools-4.55.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:aca318b77f23523309eec4475d1fbbb00a6b133eb766a8bdc401faba91261abe"},
+    {file = "fonttools-4.55.3-cp313-cp313-win32.whl", hash = "sha256:8c5ec45428edaa7022f1c949a632a6f298edc7b481312fc7dc258921e9399628"},
+    {file = "fonttools-4.55.3-cp313-cp313-win_amd64.whl", hash = "sha256:11e5de1ee0d95af4ae23c1a138b184b7f06e0b6abacabf1d0db41c90b03d834b"},
+    {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:caf8230f3e10f8f5d7593eb6d252a37caf58c480b19a17e250a63dad63834cf3"},
+    {file = "fonttools-4.55.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b586ab5b15b6097f2fb71cafa3c98edfd0dba1ad8027229e7b1e204a58b0e09d"},
+    {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8c2794ded89399cc2169c4d0bf7941247b8d5932b2659e09834adfbb01589aa"},
+    {file = "fonttools-4.55.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf4fe7c124aa3f4e4c1940880156e13f2f4d98170d35c749e6b4f119a872551e"},
+    {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:86721fbc389ef5cc1e2f477019e5069e8e4421e8d9576e9c26f840dbb04678de"},
+    {file = "fonttools-4.55.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:89bdc5d88bdeec1b15af790810e267e8332d92561dce4f0748c2b95c9bdf3926"},
+    {file = "fonttools-4.55.3-cp38-cp38-win32.whl", hash = "sha256:bc5dbb4685e51235ef487e4bd501ddfc49be5aede5e40f4cefcccabc6e60fb4b"},
+    {file = "fonttools-4.55.3-cp38-cp38-win_amd64.whl", hash = "sha256:cd70de1a52a8ee2d1877b6293af8a2484ac82514f10b1c67c1c5762d38073e56"},
+    {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bdcc9f04b36c6c20978d3f060e5323a43f6222accc4e7fcbef3f428e216d96af"},
+    {file = "fonttools-4.55.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c3ca99e0d460eff46e033cd3992a969658c3169ffcd533e0a39c63a38beb6831"},
+    {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22f38464daa6cdb7b6aebd14ab06609328fe1e9705bb0fcc7d1e69de7109ee02"},
+    {file = "fonttools-4.55.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed63959d00b61959b035c7d47f9313c2c1ece090ff63afea702fe86de00dbed4"},
+    {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5e8d657cd7326eeaba27de2740e847c6b39dde2f8d7cd7cc56f6aad404ddf0bd"},
+    {file = "fonttools-4.55.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fb594b5a99943042c702c550d5494bdd7577f6ef19b0bc73877c948a63184a32"},
+    {file = "fonttools-4.55.3-cp39-cp39-win32.whl", hash = "sha256:dc5294a3d5c84226e3dbba1b6f61d7ad813a8c0238fceea4e09aa04848c3d851"},
+    {file = "fonttools-4.55.3-cp39-cp39-win_amd64.whl", hash = "sha256:aedbeb1db64496d098e6be92b2e63b5fac4e53b1b92032dfc6988e1ea9134a4d"},
+    {file = "fonttools-4.55.3-py3-none-any.whl", hash = "sha256:f412604ccbeee81b091b420272841e5ec5ef68967a9790e80bffd0e30b8e2977"},
+    {file = "fonttools-4.55.3.tar.gz", hash = "sha256:3983313c2a04d6cc1fe9251f8fc647754cf49a61dac6cb1e7249ae67afaafc45"},
 ]
 
 [package.extras]
@@ -1239,13 +1268,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
 
 [[package]]
 name = "google-api-python-client"
-version = "2.154.0"
+version = "2.155.0"
 description = "Google API Client Library for Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google_api_python_client-2.154.0-py2.py3-none-any.whl", hash = "sha256:a521bbbb2ec0ba9d6f307cdd64ed6e21eeac372d1bd7493a4ab5022941f784ad"},
-    {file = "google_api_python_client-2.154.0.tar.gz", hash = "sha256:1b420062e03bfcaa1c79e2e00a612d29a6a934151ceb3d272fe150a656dc8f17"},
+    {file = "google_api_python_client-2.155.0-py2.py3-none-any.whl", hash = "sha256:83fe9b5aa4160899079d7c93a37be306546a17e6686e2549bcc9584f1a229747"},
+    {file = "google_api_python_client-2.155.0.tar.gz", hash = "sha256:25529f89f0d13abcf3c05c089c423fb2858ac16e0b3727543393468d0d7af67c"},
 ]
 
 [package.dependencies]
@@ -1257,13 +1286,13 @@ uritemplate = ">=3.0.1,<5"
 
 [[package]]
 name = "google-auth"
-version = "2.36.0"
+version = "2.37.0"
 description = "Google Authentication Library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "google_auth-2.36.0-py2.py3-none-any.whl", hash = "sha256:51a15d47028b66fd36e5c64a82d2d57480075bccc7da37cde257fc94177a61fb"},
-    {file = "google_auth-2.36.0.tar.gz", hash = "sha256:545e9618f2df0bcbb7dcbc45a546485b1212624716975a1ea5ae8149ce769ab1"},
+    {file = "google_auth-2.37.0-py2.py3-none-any.whl", hash = "sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0"},
+    {file = "google_auth-2.37.0.tar.gz", hash = "sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00"},
 ]
 
 [package.dependencies]
@@ -1274,6 +1303,7 @@ rsa = ">=3.1.4,<5"
 [package.extras]
 aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
 enterprise-cert = ["cryptography", "pyopenssl"]
+pyjwt = ["cryptography (>=38.0.3)", "pyjwt (>=2.0)"]
 pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
 reauth = ["pyu2f (>=0.1.5)"]
 requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
@@ -1737,6 +1767,62 @@ files = [
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
+[[package]]
+name = "ipython"
+version = "8.18.1"
+description = "IPython: Productive Interactive Computing"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"},
+    {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+decorator = "*"
+exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
+jedi = ">=0.16"
+matplotlib-inline = "*"
+pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""}
+prompt-toolkit = ">=3.0.41,<3.1.0"
+pygments = ">=2.4.0"
+stack-data = "*"
+traitlets = ">=5"
+typing-extensions = {version = "*", markers = "python_version < \"3.10\""}
+
+[package.extras]
+all = ["black", "curio", "docrepr", "exceptiongroup", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"]
+black = ["black"]
+doc = ["docrepr", "exceptiongroup", "ipykernel", "matplotlib", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"]
+kernel = ["ipykernel"]
+nbconvert = ["nbconvert"]
+nbformat = ["nbformat"]
+notebook = ["ipywidgets", "notebook"]
+parallel = ["ipyparallel"]
+qtconsole = ["qtconsole"]
+test = ["pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath"]
+test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath", "trio"]
+
+[[package]]
+name = "jedi"
+version = "0.19.2"
+description = "An autocompletion tool for Python that can be used for text editors."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"},
+    {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"},
+]
+
+[package.dependencies]
+parso = ">=0.8.4,<0.9.0"
+
+[package.extras]
+docs = ["Jinja2 (==2.11.3)", "MarkupSafe (==1.1.1)", "Pygments (==2.8.1)", "alabaster (==0.7.12)", "babel (==2.9.1)", "chardet (==4.0.0)", "commonmark (==0.8.1)", "docutils (==0.17.1)", "future (==0.18.2)", "idna (==2.10)", "imagesize (==1.2.0)", "mock (==1.0.1)", "packaging (==20.9)", "pyparsing (==2.4.7)", "pytz (==2021.1)", "readthedocs-sphinx-ext (==2.1.4)", "recommonmark (==0.5.0)", "requests (==2.25.1)", "six (==1.15.0)", "snowballstemmer (==2.1.0)", "sphinx (==1.8.5)", "sphinx-rtd-theme (==0.4.3)", "sphinxcontrib-serializinghtml (==1.1.4)", "sphinxcontrib-websupport (==1.2.4)", "urllib3 (==1.26.4)"]
+qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
+testing = ["Django", "attrs", "colorama", "docopt", "pytest (<9.0.0)"]
+
 [[package]]
 name = "jinja2"
 version = "3.1.4"
@@ -1864,6 +1950,24 @@ files = [
 [package.dependencies]
 attrs = ">=19.2.0"
 
+[[package]]
+name = "jsonpickle"
+version = "4.0.0"
+description = "jsonpickle encodes/decodes any Python object to/from JSON"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "jsonpickle-4.0.0-py3-none-any.whl", hash = "sha256:53730b9e094bc41f540bfdd25eaf6e6cf43811590e9e1477abcec44b866ddcd9"},
+    {file = "jsonpickle-4.0.0.tar.gz", hash = "sha256:fc670852b204d77601b08f8f9333149ac37ab6d3fe4e6ed3b578427291f63736"},
+]
+
+[package.extras]
+cov = ["pytest-cov"]
+dev = ["black", "pyupgrade"]
+docs = ["furo", "rst.linker (>=1.9)", "sphinx (>=3.5)"]
+packaging = ["build", "setuptools (>=61.2)", "setuptools-scm[toml] (>=6.0)", "twine"]
+testing = ["PyYAML", "atheris (>=2.3.0,<2.4.0)", "bson", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "pytest (>=6.0,!=8.1.*)", "pytest-benchmark", "pytest-benchmark[histogram]", "pytest-checkdocs (>=1.2.3)", "pytest-enabler (>=1.0.1)", "pytest-ruff (>=0.2.1)", "scikit-learn", "scipy", "scipy (>=1.9.3)", "simplejson", "sqlalchemy", "ujson"]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.7"
@@ -2173,6 +2277,20 @@ python-dateutil = ">=2.7"
 [package.extras]
 dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6,!=2.13.3)", "setuptools (>=64)", "setuptools_scm (>=7)"]
 
+[[package]]
+name = "matplotlib-inline"
+version = "0.1.7"
+description = "Inline Matplotlib backend for Jupyter"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
+    {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
+]
+
+[package.dependencies]
+traitlets = "*"
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -2672,13 +2790,13 @@ httpx = ">=0.27.0,<0.28.0"
 
 [[package]]
 name = "openai"
-version = "1.57.1"
+version = "1.57.3"
 description = "The official Python library for the openai API"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "openai-1.57.1-py3-none-any.whl", hash = "sha256:3865686c927e93492d1145938d4a24b634951531c4b2769d43ca5dbd4b25d8fd"},
-    {file = "openai-1.57.1.tar.gz", hash = "sha256:a95f22e04ab3df26e64a15d958342265e802314131275908b3b3e36f8c5d4377"},
+    {file = "openai-1.57.3-py3-none-any.whl", hash = "sha256:c4034a5676eb252ef2e0ed1f46d040ca3bdde24bb61b432f50bb0b38d0cf9ecf"},
+    {file = "openai-1.57.3.tar.gz", hash = "sha256:2c98ca6532b30d8bc5029974d2fcbd793b650009c2b014f47ffd4f9fdfc1f9eb"},
 ]
 
 [package.dependencies]
@@ -2816,6 +2934,35 @@ files = [
 [package.extras]
 dev = ["jinja2"]
 
+[[package]]
+name = "parso"
+version = "0.8.4"
+description = "A Python Parser"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
+    {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
+]
+
+[package.extras]
+qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
+testing = ["docopt", "pytest"]
+
+[[package]]
+name = "pexpect"
+version = "4.9.0"
+description = "Pexpect allows easy control of interactive console applications."
+optional = false
+python-versions = "*"
+files = [
+    {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
+    {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
+]
+
+[package.dependencies]
+ptyprocess = ">=0.5"
+
 [[package]]
 name = "pgvector"
 version = "0.3.6"
@@ -2990,6 +3137,20 @@ nodeenv = ">=0.11.1"
 pyyaml = ">=5.1"
 virtualenv = ">=20.10.0"
 
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.48"
+description = "Library for building powerful interactive command lines in Python"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"},
+    {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"},
+]
+
+[package.dependencies]
+wcwidth = "*"
+
 [[package]]
 name = "propcache"
 version = "0.2.1"
@@ -3118,6 +3279,31 @@ files = [
     {file = "protobuf-4.25.5.tar.gz", hash = "sha256:7f8249476b4a9473645db7f8ab42b02fe1488cbe5fb72fddd445e0665afd8584"},
 ]
 
+[[package]]
+name = "ptyprocess"
+version = "0.7.0"
+description = "Run a subprocess in a pseudo terminal"
+optional = false
+python-versions = "*"
+files = [
+    {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
+    {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
+]
+
+[[package]]
+name = "pure-eval"
+version = "0.2.3"
+description = "Safely evaluate AST nodes without side effects"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
+    {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"},
+]
+
+[package.extras]
+tests = ["pytest"]
+
 [[package]]
 name = "py"
 version = "1.11.0"
@@ -3351,6 +3537,20 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
+[[package]]
+name = "pygments"
+version = "2.18.0"
+description = "Pygments is a syntax highlighting package written in Python."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"},
+    {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
+]
+
+[package.extras]
+windows-terminal = ["colorama (>=0.4.6)"]
+
 [[package]]
 name = "pyjwt"
 version = "2.10.1"
@@ -3487,6 +3687,22 @@ files = [
     {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
 ]
 
+[[package]]
+name = "pyvis"
+version = "0.3.2"
+description = "A Python network graph visualization library"
+optional = false
+python-versions = ">3.6"
+files = [
+    {file = "pyvis-0.3.2-py3-none-any.whl", hash = "sha256:5720c4ca8161dc5d9ab352015723abb7a8bb8fb443edeb07f7a322db34a97555"},
+]
+
+[package.dependencies]
+ipython = ">=5.3.0"
+jinja2 = ">=2.9.6"
+jsonpickle = ">=1.4.1"
+networkx = ">=1.11"
+
 [[package]]
 name = "pywin32"
 version = "308"
@@ -3908,6 +4124,25 @@ postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
 pymysql = ["pymysql"]
 sqlcipher = ["sqlcipher3_binary"]
 
+[[package]]
+name = "stack-data"
+version = "0.6.3"
+description = "Extract data from python stack frames and tracebacks for informative displays"
+optional = false
+python-versions = "*"
+files = [
+    {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
+    {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
+]
+
+[package.dependencies]
+asttokens = ">=2.1.0"
+executing = ">=1.2.0"
+pure-eval = "*"
+
+[package.extras]
+tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
+
 [[package]]
 name = "sympy"
 version = "1.13.1"
@@ -4173,6 +4408,21 @@ notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
 
+[[package]]
+name = "traitlets"
+version = "5.14.3"
+description = "Traitlets Python configuration system"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
+    {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
+]
+
+[package.extras]
+docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
+test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
+
 [[package]]
 name = "triton"
 version = "3.1.0"
@@ -4345,6 +4595,17 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
+[[package]]
+name = "wcwidth"
+version = "0.2.13"
+description = "Measures the displayed width of unicode strings in a terminal"
+optional = false
+python-versions = "*"
+files = [
+    {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
+    {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
+]
+
 [[package]]
 name = "werkzeug"
 version = "3.1.3"
@@ -4628,4 +4889,4 @@ torch = ["torch"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9, <4.0"
-content-hash = "a32560a472d4f6230349b9c13273f2462d0bdd560600f051f220a4cb41eeed7f"
+content-hash = "86d5f192585121c048dae33edff42527cf40f8a0398e1c3e5b60c1c8ab0af363"
diff --git a/adalflow/pyproject.toml b/adalflow/pyproject.toml
index bf6c9c48..08947d81 100644
--- a/adalflow/pyproject.toml
+++ b/adalflow/pyproject.toml
@@ -96,6 +96,7 @@ tensorboardx = "^2.6.2.2"
 matplotlib = "^3.9.1"
 azure-identity = "^1.18.0"
 azure-core = "^1.31.0"
+pyvis = "^0.3.2"
 
 
 [tool.poetry.group.extra.dependencies]
@@ -127,6 +128,10 @@ datasets = ["datasets"]
 name = "nvidia-pypi"
 priority = "supplemental"
 url = "https://pypi.nvidia.com"
+# [[tool.poetry.source]]
+# name = "nvidia-pypi"
+# priority = "supplemental"
+# url = "https://pypi.nvidia.com"
 
 
 [build-system]
diff --git a/adalflow/tests/test_componentlist.py b/adalflow/tests/test_componentlist.py
new file mode 100644
index 00000000..ee0bef56
--- /dev/null
+++ b/adalflow/tests/test_componentlist.py
@@ -0,0 +1,127 @@
+import unittest
+
+# Assuming `Component` and `ComponentList` are defined in a module named `adalflow.core`
+from adalflow.core import Component, ComponentList
+
+
+class MockComponent(Component):
+    """A mock component used for testing purposes."""
+
+    def __init__(self, value):
+        super().__init__()
+        self.value = value
+
+    def __repr__(self):
+        return f"MockComponent({self.value})"
+
+
+class TestComponentList(unittest.TestCase):
+    def setUp(self):
+        """Create some mock components for testing."""
+        self.c1 = MockComponent(1)
+        self.c2 = MockComponent(2)
+        self.c3 = MockComponent(3)
+
+    def test_initialization(self):
+        """Test that ComponentList initializes correctly with components."""
+        cl = ComponentList([self.c1, self.c2])
+        self.assertEqual(len(cl), 2)
+        self.assertIs(cl[0], self.c1)
+        self.assertIs(cl[1], self.c2)
+
+    def test_append(self):
+        """Test appending a new component to the list."""
+        cl = ComponentList([self.c1])
+        cl.append(self.c2)
+        self.assertEqual(len(cl), 2)
+        self.assertIs(cl[1], self.c2)
+
+    def test_extend(self):
+        """Test extending the list with multiple components."""
+        cl = ComponentList([self.c1])
+        cl.extend([self.c2, self.c3])
+        self.assertEqual(len(cl), 3)
+        self.assertIs(cl[1], self.c2)
+        self.assertIs(cl[2], self.c3)
+
+    def test_indexing(self):
+        """Test retrieving components by index."""
+        cl = ComponentList([self.c1, self.c2, self.c3])
+        self.assertIs(cl[0], self.c1)
+        self.assertIs(cl[2], self.c3)
+
+    def test_slicing(self):
+        """Test slicing the list of components."""
+        cl = ComponentList([self.c1, self.c2, self.c3])
+        sliced = cl[1:]
+        self.assertEqual(len(sliced), 2)
+        self.assertIs(sliced[0], self.c2)
+        self.assertIs(sliced[1], self.c3)
+
+    def test_insert(self):
+        """Test inserting a component at a specific index."""
+        cl = ComponentList([self.c1, self.c3])
+        cl.insert(1, self.c2)
+        self.assertEqual(len(cl), 3)
+        self.assertIs(cl[1], self.c2)
+
+    def test_pop(self):
+        """Test removing and returning a component."""
+        cl = ComponentList([self.c1, self.c2, self.c3])
+        component = cl.pop(1)
+        self.assertIs(component, self.c2)
+        self.assertEqual(len(cl), 2)
+
+    def test_delitem(self):
+        """Test deleting components by index and slice."""
+        cl = ComponentList([self.c1, self.c2, self.c3])
+        del cl[1]
+        self.assertEqual(len(cl), 2)
+        self.assertIs(cl[0], self.c1)
+        self.assertIs(cl[1], self.c3)
+
+        cl = ComponentList([self.c1, self.c2, self.c3])
+        del cl[1:]
+        self.assertEqual(len(cl), 1)
+        self.assertIs(cl[0], self.c1)
+
+    def test_add(self):
+        """Test adding two ComponentLists."""
+        cl1 = ComponentList([self.c1])
+        cl2 = ComponentList([self.c2, self.c3])
+        cl3 = cl1 + cl2
+        self.assertEqual(len(cl3), 3)
+        self.assertIs(cl3[0], self.c1)
+        self.assertIs(cl3[1], self.c2)
+        self.assertIs(cl3[2], self.c3)
+
+    def test_iadd(self):
+        """Test adding components using the += operator."""
+        cl = ComponentList([self.c1])
+        cl += [self.c2, self.c3]
+        self.assertEqual(len(cl), 3)
+        self.assertIs(cl[1], self.c2)
+        self.assertIs(cl[2], self.c3)
+
+    def test_repr(self):
+        """Test the custom __repr__ implementation."""
+        cl = ComponentList([MockComponent(1), MockComponent(1), MockComponent(2)])
+        expected = (
+            "ComponentList(\n  (0-1): 2 x MockComponent(1)\n  (2): MockComponent(2)\n)"
+        )
+        self.assertEqual(repr(cl), expected)
+
+    def test_len(self):
+        """Test the length of the ComponentList."""
+        cl = ComponentList([self.c1, self.c2])
+        self.assertEqual(len(cl), 2)
+
+    def test_iter(self):
+        """Test iterating over the components."""
+        cl = ComponentList([self.c1, self.c2, self.c3])
+        components = list(iter(cl))
+        self.assertEqual(components, [self.c1, self.c2, self.c3])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/adalflow/tests/test_parameter_text_grad.py b/adalflow/tests/test_parameter_text_grad.py
index f3ea2c1c..91cf4dc9 100644
--- a/adalflow/tests/test_parameter_text_grad.py
+++ b/adalflow/tests/test_parameter_text_grad.py
@@ -29,7 +29,9 @@ def setUp(self):
         )
 
     def test_get_gradient_text(self):
-        expected_output = """1.
+        expected_output = """Batch size: 1
+
+1.
 <CONTEXT>Conversation context</CONTEXT>
 
 <FEEDBACK>Gradient 2</FEEDBACK>"""
@@ -79,7 +81,7 @@ def test_update_prompt(self):
         result = tgd.llm_optimizer.get_prompt(**user_prompt_kwargs)
 
         # Check if each variable value is in the generated output
-        self.assertIn("Role description", result)
+        # self.assertIn("Role description", result)
         # self.assertIn("short value", result)
         self.assertIn("gradient and context text", result)
         # self.assertIn("<start>", result)
diff --git a/benchmarks/hotpot_qa/adal_train.py b/benchmarks/hotpot_qa/_adal_train.py
similarity index 99%
rename from benchmarks/hotpot_qa/adal_train.py
rename to benchmarks/hotpot_qa/_adal_train.py
index 4162bc98..e397cf0f 100644
--- a/benchmarks/hotpot_qa/adal_train.py
+++ b/benchmarks/hotpot_qa/_adal_train.py
@@ -1,3 +1,4 @@
+"deprecated"
 """We will use dspy's retriever to keep that the same and only use our generator and optimizer"""
 
 import dspy
@@ -22,9 +23,9 @@
 
 def load_datasets():
 
-    trainset = HotPotQA(split="train", size=20)
-    valset = HotPotQA(split="val", size=50)
-    testset = HotPotQA(split="test", size=50)  # to keep the same as the dspy
+    trainset = HotPotQA(split="train", size=20)  # 20
+    valset = HotPotQA(split="val", size=50)  # 50
+    testset = HotPotQA(split="test", size=50)  # to keep the same as the dspy #50
     print(f"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}")
     return trainset, valset, testset
 
diff --git a/benchmarks/hotpot_qa/adal_exp/build_multi_hop_rag.py b/benchmarks/hotpot_qa/adal_exp/build_multi_hop_rag.py
new file mode 100644
index 00000000..cebcfdf2
--- /dev/null
+++ b/benchmarks/hotpot_qa/adal_exp/build_multi_hop_rag.py
@@ -0,0 +1,534 @@
+"""We will use dspy's retriever to keep that the same and only use our generator and optimizer"""
+
+import dspy
+from typing import List
+from dataclasses import dataclass, field
+
+import adalflow as adal
+from adalflow.optim.parameter import Parameter, ParameterType
+
+
+from adalflow.core.retriever import Retriever
+
+from benchmarks.hotpot_qa.adal_exp.build_vanilla_rag import DspyRetriever
+from adalflow.utils.logger import printc
+
+colbertv2_wiki17_abstracts = dspy.ColBERTv2(
+    url="http://20.102.90.50:2017/wiki17_abstracts"
+)
+
+dspy.settings.configure(rm=colbertv2_wiki17_abstracts)
+
+
+# task pipeline
+
+
+# dspy format
+# Follow the following format.
+# Context: may contain relevant facts
+# Question: ${question}
+# Reasoning: Let's think step by step in order to ${produce the query}. We ...
+# Query: ${query}
+@dataclass
+class QueryRewritterData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the query"},
+    )
+    query: str = field(
+        metadata={"desc": "The query you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "query"]
+
+
+query_template = """<START_OF_SYSTEM_PROMPT>
+{{task_desc_str}}
+
+{{output_format_str}}
+{# Few shot demos #}
+{% if few_shot_demos is not none %}
+Here are some examples:
+{{few_shot_demos}}
+{% endif %}
+<END_OF_SYSTEM_PROMPT>
+<START_OF_USER>
+Context: {{context}}
+Question: {{question}}
+<END_OF_USER>
+"""
+
+
+class DeduplicateList(adal.GradComponent):
+    def __init__(self):
+        super().__init__()
+
+    def call(self, exisiting_list: List[str], new_list: List[str]) -> List[str]:
+
+        seen = set()
+        return [x for x in exisiting_list + new_list if not (x in seen or seen.add(x))]
+
+    def backward(self, *args, **kwargs):
+
+        printc(f"DeduplicateList backward: {args}", "yellow")
+        return super().backward(*args, **kwargs)
+
+
+# User customize an auto-grad operator
+# Need this to be a GradComponent
+
+
+# NOTE: deprecated
+class MultiHopRetriever(adal.Retriever):
+    def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2):
+        super().__init__()
+
+        self.passages_per_hop = passages_per_hop
+        self.max_hops = max_hops
+
+        self.data_parser = adal.DataClassParser(
+            data_class=QueryRewritterData, return_data_class=True, format_type="json"
+        )
+
+        # Grad Component
+        self.query_generators: List[adal.Generator] = []
+        for i in range(self.max_hops):
+            self.query_generators.append(
+                adal.Generator(
+                    name=f"query_generator_{i}",
+                    model_client=model_client,
+                    model_kwargs=model_kwargs,
+                    prompt_kwargs={
+                        "few_shot_demos": Parameter(
+                            name="few_shot_demos_1",
+                            data=None,
+                            role_desc="To provide few shot demos to the language model",
+                            requires_opt=True,
+                            param_type=ParameterType.DEMOS,
+                        ),
+                        "task_desc_str": Parameter(
+                            name="task_desc_str",
+                            data="""Write a simple search query that will help answer a complex question.
+
+You will receive a context(may contain relevant facts) and a question.
+Think step by step.""",
+                            role_desc="Task description for the language model",
+                            requires_opt=True,
+                            param_type=ParameterType.PROMPT,
+                        ),
+                        "output_format_str": self.data_parser.get_output_format_str(),
+                    },
+                    template=query_template,
+                    output_processors=self.data_parser,
+                    use_cache=True,
+                )
+            )
+        self.retriever = DspyRetriever(top_k=passages_per_hop)
+        self.deduplicater = DeduplicateList()
+
+    @staticmethod
+    def context_to_str(context: List[str]) -> str:
+        return "\n".join(context)
+
+    @staticmethod
+    def deduplicate(seq: list[str]) -> list[str]:
+        """
+        Source: https://stackoverflow.com/a/480227/1493011
+        """
+
+        seen = set()
+        return [x for x in seq if not (x in seen or seen.add(x))]
+
+    def call(self, *, question: str, id: str = None) -> adal.RetrieverOutput:
+        context = []
+        print(f"question: {question}")
+        for i in range(self.max_hops):
+            gen_out = self.query_generators[i](
+                prompt_kwargs={
+                    "context": self.context_to_str(context),
+                    "question": question,
+                },
+                id=id,
+            )
+
+            query = gen_out.data.query if gen_out.data and gen_out.data.query else None
+
+            print(f"query {i}: {query}")
+
+            retrieve_out = self.retriever.call(input=query)
+            passages = retrieve_out[0].documents
+            context = self.deduplicate(context + passages)
+        out = [adal.RetrieverOutput(documents=context, query=query, doc_indices=[])]
+        return out
+
+    def forward(self, *, question: str, id: str = None) -> adal.Parameter:
+        # assemble the foundamental building blocks
+        context = []
+        print(f"question: {question}")
+        # 1. make question a parameter as generator does not have it yet
+        # can create the parameter at the leaf, but not the intermediate nodes
+        question_param = adal.Parameter(
+            name="question",
+            data=question,
+            role_desc="The question to be answered",
+            requires_opt=True,
+            param_type=ParameterType.INPUT,
+        )
+        context_param = adal.Parameter(
+            name="context",
+            data=context,
+            role_desc="The context to be used for the query",
+            requires_opt=True,
+            param_type=ParameterType.INPUT,
+        )
+        context_param.add_successor_map_fn(
+            successor=self.query_generators[0],
+            map_fn=lambda x: self.context_to_str(x.data),
+        )
+
+        for i in range(self.max_hops):
+
+            gen_out = self.query_generators[i].forward(
+                prompt_kwargs={
+                    "context": context_param,
+                    "question": question_param,
+                },
+                id=id,
+            )
+
+            success_map_fn = lambda x: (  # noqa E731
+                x.full_response.data.query
+                if x.full_response
+                and x.full_response.data
+                and x.full_response.data.query
+                else None
+            )
+            print(f"query {i}: {success_map_fn(gen_out)}")
+
+            gen_out.add_successor_map_fn(
+                successor=self.retriever, map_fn=success_map_fn
+            )
+
+            retrieve_out = self.retriever.forward(input=gen_out)
+
+            def retrieve_out_map_fn(x: adal.Parameter):
+                return x.data[0].documents if x.data and x.data[0].documents else []
+
+            print(f"retrieve_out: {retrieve_out}")
+
+            retrieve_out.add_successor_map_fn(
+                successor=self.deduplicater, map_fn=retrieve_out_map_fn
+            )
+
+            context_param = self.deduplicater.forward(
+                exisiting_list=context_param, new_list=retrieve_out
+            )
+
+        context_param.param_type = ParameterType.RETRIEVER_OUTPUT
+
+        return context_param
+
+
+class MultiHopRetriever2(adal.Retriever):
+    def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2):
+        super().__init__()
+
+        self.passages_per_hop = passages_per_hop
+        self.max_hops = max_hops
+
+        self.data_parser = adal.DataClassParser(
+            data_class=QueryRewritterData, return_data_class=True, format_type="json"
+        )
+
+        # Grad Component
+        # self.query_generators: List[adal.Generator] = []
+        self.query_generators: adal.ComponentList[adal.Generator] = adal.ComponentList()
+        self.retrievers: List[Retriever] = []
+        self.deduplicaters: List[adal.GradComponent] = []
+        for i in range(self.max_hops):
+            self.query_generators.append(
+                adal.Generator(
+                    name=f"query_generator_{i}",
+                    model_client=model_client,
+                    model_kwargs=model_kwargs,
+                    prompt_kwargs={
+                        "few_shot_demos": Parameter(
+                            name=f"few_shot_demos_{i}",
+                            data=None,
+                            role_desc="To provide few shot demos to the language model",
+                            requires_opt=True,
+                            param_type=ParameterType.DEMOS,
+                        ),
+                        "task_desc_str": Parameter(
+                            name="task_desc_str",
+                            data="""Write a simple search query that will help answer a complex question.
+
+You will receive a context(may contain relevant facts) and a question.
+Think step by step.""",
+                            role_desc="Task description for the language model",
+                            requires_opt=True,
+                            param_type=ParameterType.PROMPT,
+                        ),
+                        "output_format_str": self.data_parser.get_output_format_str(),
+                    },
+                    template=query_template,
+                    output_processors=self.data_parser,
+                    use_cache=True,
+                )
+            )
+            self.retrievers.append(DspyRetriever(top_k=passages_per_hop))
+            self.deduplicaters.append(DeduplicateList())
+
+    @staticmethod
+    def context_to_str(context: List[str]) -> str:
+        return "\n".join(context)
+
+    @staticmethod
+    def deduplicate(seq: list[str]) -> list[str]:
+        """
+        Source: https://stackoverflow.com/a/480227/1493011
+        """
+
+        seen = set()
+        return [x for x in seq if not (x in seen or seen.add(x))]
+
+    # def call(self, *, question: str, id: str = None) -> adal.RetrieverOutput:
+    #     context = []
+    #     print(f"question: {question}")
+    #     for i in range(self.max_hops):
+    #         gen_out = self.query_generators[i](
+    #             prompt_kwargs={
+    #                 "context": self.context_to_str(context),
+    #                 "question": question,
+    #             },
+    #             id=id,
+    #         )
+
+    #         query = gen_out.data.query if gen_out.data and gen_out.data.query else None
+
+    #         print(f"query {i}: {query}")
+
+    #         retrieve_out = self.retrievers[i].call(input=query)
+    #         passages = retrieve_out[0].documents
+    #         context = self.deduplicate(context + passages)
+    #     out = [adal.RetrieverOutput(documents=context, query=query, doc_indices=[])]
+    #     return out
+
+    # TODO: simplify and avoid the need where users need to write two methods (call and forward)
+    def call(self, *, input: str, id: str = None) -> List[adal.RetrieverOutput]:
+        # assemble the foundamental building blocks
+        printc(f"question: {input}", "yellow")
+        out = self.forward(input=input, id=id)
+
+        if not isinstance(out, adal.Parameter):
+            raise ValueError("The output should be a parameter")
+
+        return out.data  # or full response its up to users
+
+    def forward(self, *, input: str, id: str = None) -> adal.Parameter:
+        # assemble the foundamental building blocks
+        printc(f"question: {input}", "yellow")
+        context = []
+
+        queries: List[str] = []
+
+        for i in range(self.max_hops):
+
+            gen_out = self.query_generators[i].forward(
+                prompt_kwargs={
+                    "context": context,  # can be a list or a parameter
+                    "question": adal.Parameter(
+                        name="question",
+                        data=input,
+                        role_desc="The question to be answered",
+                        requires_opt=False,
+                        param_type=ParameterType.INPUT,
+                    ),
+                },
+                id=id,
+            )
+
+            success_map_fn = lambda x: (  # noqa E731
+                x.full_response.data.query
+                if x.full_response
+                and x.full_response.data
+                and x.full_response.data.query
+                else (
+                    x.full_response.raw_response
+                    if x.full_response and x.full_response.raw_response
+                    else None
+                )
+            )
+            print(f"query {i}: {success_map_fn(gen_out)}")
+
+            queries.append(success_map_fn(gen_out))
+
+            gen_out.add_successor_map_fn(
+                successor=self.retrievers[i], map_fn=success_map_fn
+            )
+
+            if success_map_fn(gen_out) is None:
+                raise ValueError(f"The query is None, please check the generator {i}")
+
+            retrieve_out = self.retrievers[i].forward(input=gen_out, id=id)
+
+            def retrieve_out_map_fn(x: adal.Parameter):
+                return x.data[0].documents if x.data and x.data[0].documents else []
+
+            # print(f"retrieve_out: {retrieve_out}")
+
+            retrieve_out.add_successor_map_fn(
+                successor=self.deduplicaters[i], map_fn=retrieve_out_map_fn
+            )
+
+            context = self.deduplicaters[i].forward(
+                exisiting_list=context, new_list=retrieve_out
+            )
+
+        context.param_type = ParameterType.RETRIEVER_OUTPUT
+
+        def context_to_retrover_output(x):
+            return [
+                adal.RetrieverOutput(
+                    documents=x.data, query=[input] + queries, doc_indices=[]
+                )
+            ]
+
+        context.data = context_to_retrover_output(context)
+
+        printc(f"MultiHopRetriever2 grad fn: {context.grad_fn}", "yellow")
+
+        return context
+
+    def backward(self, *args, **kwargs):
+
+        printc(f"MultiHopRetriever2 backward: {args}", "yellow")
+        super().backward(*args, **kwargs)
+        return
+
+
+from benchmarks.hotpot_qa.adal_exp.build_vanilla_rag import VanillaRAG
+
+
+class MultiHopRAG(VanillaRAG):
+    def __init__(
+        self, passages_per_hop=3, max_hops=2, model_client=None, model_kwargs=None
+    ):
+        super().__init__(
+            passages_per_hop=passages_per_hop,
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+        )
+        self.retriever = MultiHopRetriever2(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            passages_per_hop=passages_per_hop,
+            max_hops=max_hops,
+        )
+
+
+def test_multi_hop_retriever():
+
+    from use_cases.config import (
+        gpt_3_model,
+    )
+
+    multi_hop_retriever = MultiHopRetriever(
+        **gpt_3_model,
+        passages_per_hop=3,
+        max_hops=2,
+    )
+
+    question = "How many storeys are in the castle that David Gregory inherited?"
+
+    # eval mode
+    output = multi_hop_retriever.call(question=question, id="1")
+    print(output)
+
+    # train mode
+    multi_hop_retriever.train()
+    output = multi_hop_retriever.forward(question=question, id="1")
+    print(output)
+    output.draw_graph()
+
+
+def test_multi_hop_retriever2():
+
+    from use_cases.config import (
+        gpt_3_model,
+    )
+
+    multi_hop_retriever = MultiHopRetriever2(
+        **gpt_3_model,
+        passages_per_hop=3,
+        max_hops=2,
+    )
+
+    question = "How many storeys are in the castle that David Gregory inherited?"
+
+    # eval mode
+    # output = multi_hop_retriever.call(question=question, id="1")
+    # print(output)
+
+    # train mode
+    multi_hop_retriever.train()
+    output = multi_hop_retriever.forward(input=question, id="1")
+    # print(output)
+    output.draw_graph(full_trace=True)
+
+    # multi_hop_retriever.eval()
+    # output = multi_hop_retriever.call(input=question, id="1")
+    # print(output)
+
+
+def test_multi_hop_rag():
+
+    from use_cases.config import (
+        gpt_3_model,
+    )
+
+    adal.get_logger(level="DEBUG")
+
+    task = MultiHopRAG(
+        **gpt_3_model,
+        passages_per_hop=3,
+        max_hops=2,
+    )
+    print(f"task: {task}")
+
+    for name, comp in task.named_components():
+
+        if isinstance(comp, adal.Generator):
+            print(f"name: {name}")
+            print(f"comp: {comp }")
+    return
+
+    # test the retriever
+
+    question = "How many storeys are in the castle that David Gregory inherited?"
+
+    task.train()
+
+    # id = "1"
+
+    # retriever_out = task.retriever(input=question, id=id)
+
+    # print(f"retriever_out: {retriever_out}")
+
+    # test the forward function
+    generator_out = task.forward(question=question, id="1")
+    print(f"generator_out: {generator_out}")
+
+    generator_out.draw_graph()
+
+    # task.eval()
+    # generator_out = task.call(question=question, id="1")
+    # print(f"generator_out: {generator_out}")
+
+
+if __name__ == "__main__":
+    ### Try the minimum effort to test on any task
+
+    # get_logger(level="DEBUG")
+    # test_multi_hop_retriever()
+    # test_multi_hop_retriever2()
+    test_multi_hop_rag()
diff --git a/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py b/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py
index 7e66ca9b..3eae0598 100644
--- a/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py
+++ b/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py
@@ -108,10 +108,15 @@ def __init__(self, top_k: int = 3):
         self.top_k = top_k
         self.dspy_retriever = dspy.Retrieve(k=top_k)
 
-    def call(self, input: str, top_k: Optional[int] = None) -> List[RetrieverOutput]:
+    def call(
+        self, input: str, top_k: Optional[int] = None, id: str = None
+    ) -> List[RetrieverOutput]:
 
         k = top_k or self.top_k
 
+        if not input:
+            raise ValueError(f"Input cannot be empty, top_k: {k}")
+
         output = self.dspy_retriever(query_or_queries=input, k=k)
         # print(f"dsy_retriever output: {output}")
         final_output: List[RetrieverOutput] = []
@@ -152,7 +157,7 @@ def __init__(self, passages_per_hop=3, model_client=None, model_kwargs=None):
                     data=task_desc_str,
                     role_desc="Task description for the language model",
                     param_type=adal.ParameterType.PROMPT,
-                    requires_opt=False,
+                    requires_opt=True,
                 ),
                 "few_shot_demos": adal.Parameter(
                     data=None,
@@ -180,7 +185,7 @@ def call(self, question: str, id: str = None) -> adal.GeneratorOutput:
                 "This component is not supposed to be called in training mode"
             )
 
-        retriever_out = self.retriever.call(input=question)
+        retriever_out = self.retriever.call(input=question, id=id)
 
         successor_map_fn = lambda x: (  # noqa E731
             "\n\n".join(x[0].documents) if x and x[0] and x[0].documents else ""
@@ -201,11 +206,21 @@ def call(self, question: str, id: str = None) -> adal.GeneratorOutput:
         # print(f"retriever_out: {retriever_out}")
         return output
 
+    # def call(self, *, question: str, id: str = None) -> adal.GeneratorOutput:
+    #     self.train()
+    #     out = self.forward(question=question, id=id)
+    #     if not isinstance(out, adal.Parameter):
+    #         raise ValueError(
+    #             "This output should be a Parameter, please check the forward function"
+    #         )
+    #     self.eval()
+    #     return out.data
+
     # TODO: add id in the retriever output
     def forward(self, question: str, id: str = None) -> adal.Parameter:
         if not self.training:
             raise ValueError("This component is not supposed to be called in eval mode")
-        retriever_out = self.retriever.forward(input=question)
+        retriever_out = self.retriever.forward(input=question, id=id)
         successor_map_fn = lambda x: (  # noqa E731
             "\n\n".join(x.data[0].documents)
             if x.data and x.data[0] and x.data[0].documents
@@ -281,9 +296,9 @@ def test_vailla_rag():
 
     generator_out.draw_graph()
 
-    task.eval()
-    generator_out = task.call(question=question, id="1")
-    print(f"generator_out: {generator_out}")
+    # task.eval()
+    # generator_out = task.call(question=question, id="1")
+    # print(f"generator_out: {generator_out}")
 
 
 if __name__ == "__main__":
diff --git a/benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py b/benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py
new file mode 100644
index 00000000..d80e6336
--- /dev/null
+++ b/benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py
@@ -0,0 +1,183 @@
+from typing import Any, Callable, Dict, Tuple
+
+import adalflow as adal
+from adalflow.eval.answer_match_acc import AnswerMatchAcc
+from adalflow.datasets.types import HotPotQAData
+
+from benchmarks.hotpot_qa._adal_train import load_datasets
+from benchmarks.hotpot_qa.adal_exp.build_multi_hop_rag import MultiHopRAG
+from use_cases.config import gpt_3_model, gpt_4o_model
+
+
+# TODO: look more into the loss function
+# TODO: test LLM judge too.
+class MultiHopRAGAdal(adal.AdalComponent):
+    def __init__(
+        self,
+        model_client: adal.ModelClient,
+        model_kwargs: Dict,
+        backward_engine_model_config: Dict | None = None,
+        teacher_model_config: Dict | None = None,
+        text_optimizer_model_config: Dict | None = None,
+    ):
+        task = MultiHopRAG(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            passages_per_hop=3,
+            max_hops=2,
+        )
+        eval_fn = AnswerMatchAcc(type="fuzzy_match").compute_single_item
+        loss_fn = adal.EvalFnToTextLoss(
+            eval_fn=eval_fn, eval_fn_desc="fuzzy_match: 1 if str(y) in str(y_gt) else 0"
+        )
+        super().__init__(
+            task=task,
+            eval_fn=eval_fn,
+            loss_fn=loss_fn,
+            backward_engine_model_config=backward_engine_model_config,
+            teacher_model_config=teacher_model_config,
+            text_optimizer_model_config=text_optimizer_model_config,
+        )
+
+    # tell the trainer how to call the task
+    def prepare_task(self, sample: HotPotQAData) -> Tuple[Callable[..., Any], Dict]:
+        if self.task.training:
+            return self.task.forward, {"question": sample.question, "id": sample.id}
+        else:
+            return self.task.call, {"question": sample.question, "id": sample.id}
+
+    # TODO: use two map fn to make the cde even simpler
+
+    # eval mode: get the generator output, directly engage with the eval_fn
+    def prepare_eval(self, sample: HotPotQAData, y_pred: adal.GeneratorOutput) -> float:
+        y_label = ""
+        if y_pred and y_pred.data and y_pred.data.answer:
+            y_label = y_pred.data.answer
+        return self.eval_fn, {"y": y_label, "y_gt": sample.answer}
+
+    # train mode: get the loss and get the data from the full_response
+    def prepare_loss(self, sample: HotPotQAData, pred: adal.Parameter):
+        # prepare gt parameter
+        y_gt = adal.Parameter(
+            name="y_gt",
+            data=sample.answer,
+            eval_input=sample.answer,
+            requires_opt=False,
+        )
+
+        # pred's full_response is the output of the task pipeline which is GeneratorOutput
+        pred.eval_input = (
+            pred.full_response.data.answer
+            if pred.full_response
+            and pred.full_response.data
+            and pred.full_response.data.answer
+            else ""
+        )
+        return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}}
+
+
+# Note: diagnose is quite helpful, it helps you to quickly check if the evalfunction is the right metrics
+# i checked the eval which does fuzzy match, and found some yes and Yes are not matched, then converted both strings to lower and
+# the performances have gone up from 0.15 to 0.4
+def train_diagnose(
+    model_client: adal.ModelClient,
+    model_kwargs: Dict,
+) -> Dict:
+
+    trainset, valset, testset = load_datasets()
+
+    adal_component = MultiHopRAGAdal(
+        model_client,
+        model_kwargs,
+        backward_engine_model_config=gpt_4o_model,
+        teacher_model_config=gpt_3_model,
+        text_optimizer_model_config=gpt_3_model,
+    )
+    trainer = adal.Trainer(adaltask=adal_component)
+    trainer.diagnose(dataset=trainset, split="train")
+    # trainer.diagnose(dataset=valset, split="val")
+    # trainer.diagnose(dataset=testset, split="test")
+
+
+def train(
+    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle
+    raw_shots: int = 0,
+    bootstrap_shots: int = 4,
+    max_steps=1,
+    num_workers=4,
+    strategy="constrained",
+    optimization_order="sequential",
+    debug=False,
+    resume_from_ckpt=None,
+    exclude_input_fields_from_bootstrap_demos=True,
+):
+    adal_component = MultiHopRAGAdal(
+        **gpt_3_model,
+        teacher_model_config=gpt_3_model,
+        text_optimizer_model_config=gpt_4o_model,  # gpt3.5 is not enough to be used as a good optimizer, it struggles for long contenxt
+        backward_engine_model_config=gpt_4o_model,
+    )
+    print(adal_component)
+    trainer = adal.Trainer(
+        train_batch_size=train_batch_size,
+        adaltask=adal_component,
+        strategy=strategy,
+        max_steps=max_steps,
+        num_workers=num_workers,
+        raw_shots=raw_shots,
+        bootstrap_shots=bootstrap_shots,
+        debug=debug,
+        weighted_sampling=True,
+        optimization_order=optimization_order,
+        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,
+        sequential_order=["text", "demo"],
+    )
+    print(trainer)
+
+    train_dataset, val_dataset, test_dataset = load_datasets()
+    trainer.fit(
+        train_dataset=train_dataset,
+        val_dataset=val_dataset,
+        test_dataset=test_dataset,
+        resume_from_ckpt=resume_from_ckpt,
+    )
+
+
+if __name__ == "__main__":
+    from use_cases.config import gpt_3_model
+
+    log = adal.get_logger(level="DEBUG", enable_console=False)
+
+    adal.setup_env()
+
+    # task = MultiHopRAGAdal(**gpt_3_model)
+    # print(task)
+
+    # train_diagnose(**gpt_3_model)
+
+    # train: 0.15 before the evaluator converted to lower and 0.4 after the conversion
+    train(
+        debug=False,
+        max_steps=12,
+        # resume_from_ckpt="/Users/liyin/.adalflow/ckpt/ValinaRAGAdal/random_max_steps_12_7c091_run_1.json",
+    )
+
+    # notes for debug: if have nontype, delete all model cache and try again
+    #    raise ValueError(ValueError: score must be provided for each demo,
+
+    # 12/11/2024
+    # demo only: /Users/liyin/Documents/test/LightRAG/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_8cdfc_run_9.json
+
+    # why text grad did not improve in the rag case? Do we need to improve the meta prompt?
+    # /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_2686e_run_1.json
+    # 0.58 -> 0.68 on the test split
+    # 0.72 text grad  /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_c1660_run_1.json
+    # try cycle next
+    #  0.66 /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_1d189_run_1.json
+    # no gradients 1021s (/Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_68e7e_run_1.json) -> 0.64 -> 0.68, pass 10/10+28
+    # no gradient but scores (positive & negative) /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_83871_run_1.json 0.64->0.66, test 0.64 -> 0.66
+    # no gradient but only negative score
+    # no gradient but score + teacher demonstration.
+    # feedback while seeing the gt + y
+    # only negative feedback /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_f5506_run_1.json 0.62 -> 0.7
+    # /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/constrained_max_steps_12_b4aa5_run_1.json 0.74 pass rate 8 32
diff --git a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
index b6cfe9e6..fc14e161 100644
--- a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
+++ b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
@@ -4,7 +4,7 @@
 from adalflow.eval.answer_match_acc import AnswerMatchAcc
 from adalflow.datasets.types import HotPotQAData
 
-from benchmarks.hotpot_qa.adal_train import load_datasets
+from benchmarks.hotpot_qa._adal_train import load_datasets
 from benchmarks.hotpot_qa.adal_exp.build_vanilla_rag import VanillaRAG
 from use_cases.config import gpt_3_model, gpt_4o_model
 
diff --git a/docs/source/tutorials/auto_text_grad.rst b/docs/source/tutorials/auto_text_grad.rst
index ea4294f7..438da80d 100644
--- a/docs/source/tutorials/auto_text_grad.rst
+++ b/docs/source/tutorials/auto_text_grad.rst
@@ -139,6 +139,13 @@ We currently have the following operators:
   - forward will be able to track the predecessors to form a DAG of parameters, this will always be helpful.
   - # a forward will
 
+10/27
+
+**Score for weighted sampling in few-shot demo**
+
+Backpropagation is also used in few-shot demo especially at passing the score backward to predecessors and accumulate to the demo parameter.
+
+
 Generator Adaptation
 ~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/tutorials/generator.rst b/docs/source/tutorials/generator.rst
index a170a8de..214886cb 100644
--- a/docs/source/tutorials/generator.rst
+++ b/docs/source/tutorials/generator.rst
@@ -119,10 +119,10 @@ The minimum setup to initiate a generator in the code:
 
 .. code-block:: python
 
-    from adalflow.core import Generator
+    import adalflow as adal
     from adalflow.components.model_client import GroqAPIClient
 
-    generator = Generator(
+    generator = adal.Generator(
         model_client=GroqAPIClient(),
         model_kwargs={"model": "llama3-8b-8192"},
     )
diff --git a/docs/source/tutorials/trainer.rst b/docs/source/tutorials/trainer.rst
index a8b1d750..fd70778c 100644
--- a/docs/source/tutorials/trainer.rst
+++ b/docs/source/tutorials/trainer.rst
@@ -3,3 +3,62 @@
 Trainer
 ================
 Coming soon!
+
+Diagnose mode
+
+A pipeline can consist of multiple generators or retrievers. Each
+
+
+Computation graph
+-------------------
+We design two types of graphs:
+
+1. with a simple node-graph with consistent naming of each generator(component_name or automated name by the recursive tracing (need to be consistent eventually)) [Call it thumbnail] or a better name.
+2. with details for debugging and building the pipeline.
+
+EvalFunction + Score(s)
+------------------------
+Currently we can assume we only support one eval_score, but eventually we need to suppport two scores, such as in the case of the multi-hop RAG.
+The last llm call will have one score, and the previous two generators can potentially have two scores. One is from the last score, and the second will be from the output of the multi-hop retriever.
+
+So, we need to assign a unique and global component id/name. [Score, component_id, component_name]
+
+Observability
+------------------------
+Building blocks include: `GeneratorCallLogger`, `RetrieverCallLogger`, `LossCallLogger` where each only traces a single component.
+
+In `AdalComponnet`, `configure_callbacks` we need both `_auto_generator_callbacks` and `_auto_retriever_callbacks` to be able to trace the call of each component.
+
+..code-block:: python
+
+    for name, generator in all_generators:
+        call_logger = GeneratorCallLogger(save_dir=save_dir)
+        call_logger.reset()
+        call_logger.register_generator(name)
+        logger_call = partial(call_logger.log_call, name)
+        generator.register_callback(
+            "on_complete", partial(_on_completion_callback, logger_call=logger_call)
+        )
+        file_path = call_logger.get_log_location(name)
+        file_paths.append(file_path)
+        log.debug(f"Registered callback for {name}, file path: {file_path}")
+
+
+so when tracing, the `logger_metadata.json` will look like this:
+
+.. code-block:: json
+
+    {
+    "retriever.query_generators.0": "/Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/diagnose_train/retriever.query_generators.0_call.jsonl",
+    "retriever.query_generators.1": "/Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/diagnose_train/retriever.query_generators.1_call.jsonl",
+    "llm": "/Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/diagnose_train/llm_call.jsonl"
+    }
+
+TODO:
+- [ ] support multiple eval scores.
+- [ ] logger meta data
+
+  {
+    "llm": "/Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/diagnose_train/llm_call.jsonl"
+}
+- [ ] retriever log: call_logger = GeneratorCallLogger(save_dir=save_dir)
diff --git a/docs/source/use_cases/multi_hop_rag_opt.rst b/docs/source/use_cases/multi_hop_rag_opt.rst
new file mode 100644
index 00000000..b5752d28
--- /dev/null
+++ b/docs/source/use_cases/multi_hop_rag_opt.rst
@@ -0,0 +1,86 @@
+Multi-hop RAG Optimization
+============================
+
+
+question: How many storeys are in the castle that David Gregory inherited?
+
+query 0: Number of storeys in the castle inherited by David Gregory
+
+Add context from retriever -> query generator
+
+
+query 1: Kinnairdy Castle storeys OR floors OR levels
+
+So eventually the multi-hop RAG with the multi-hop retriever that combines the advanced Generator to transform the query into multiple querires (similar to REACT agent design)
+. By knowing the castle name from the first query and retrieval (not seen from the question itself), the second time it will be able to retrieve the right context the second time.
+Of course, we can even let the LLM workflow decide to stop the retrieval once it has obtained enough information.
+
+
+When multi-hop is not enabled, the vanilla rag failed to give the answer.
+When it is enabled, the answer is correct.
+
+resoning:
+
+David Gregory inherited Kinnairdy Castle, which is a tower house having five storeys and a garret, located two miles south of Aberchirder, Aberdeenshire, Scotland.
+
+answr: Kinnairdy Castle has five storeys."
+
+Other logs:
+
+-----------------
+
+1. fix the dspy code  at `.venv/lib/python3.12/site-packages/dsp/modules/colbertv2.py`
+
+.. code-block::python
+
+    from tenacity import retry, stop_after_attempt, wait_exponential
+
+
+    @CacheMemory.cache
+    @retry(
+        stop=stop_after_attempt(5),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        reraise=True,
+    )
+    def colbertv2_get_request_v2(url: str, query: str, k: int):
+        assert k <= 100, "Only k <= 100 is supported for the hosted ColBERTv2 server."
+
+        payload = {"query": query, "k": k}
+
+        try:
+            res = requests.get(url, params=payload, timeout=10)
+            res.raise_for_status()
+            response_json = res.json()
+
+            # Check for an error in the response.
+            if response_json.get("error"):
+                raise ConnectionError(f"Error from server: {response_json['message']}")
+
+            # If we get a valid 'topk' response, return immediately.
+            if "topk" in response_json:
+                topk = response_json["topk"][:k]
+                return [{**d, "long_text": d["text"]} for d in topk]
+
+        except requests.exceptions.Timeout:
+            raise TimeoutError("The request timed out. Please try again.")
+        except requests.exceptions.RequestException as e:
+            raise ConnectionError(f"Request failed: {e}")
+
+        raise KeyError("'topk' key not found in the response.")
+
+2. If error in diagnose similar to
+
+   ..code-block:: python
+
+        Error loading jsonl file /Users/liyin/.adalflow/ckpt/MultiHopRAGAdal/diagnose_train/llm_call.jsonl: line contains invalid json: unexpected content after document: line 1 column 8568 (char 8567) (line 62)
+    Traceback (most recent call last):
+    File "/Users/liyin/Documents/test/LightRAG/benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py", line 153, in <module>
+        train_diagnose(**gpt_3_model)
+    File "/Users/liyin/Documents/test/LightRAG/benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py", line 97, in train_diagnose
+        trainer.diagnose(dataset=trainset, split="train")
+    File "/Users/liyin/Documents/test/LightRAG/adalflow/adalflow/optim/trainer/trainer.py", line 228, in diagnose
+        sorted_logs = [logs_dict[id] for id in sorted_ids]
+                    ~~~~~~~~~^^^^
+    KeyError: '5a8b57f25542995d1e6f1371'
+
+You can go to the `llm_call.jsonl` file and clean all content of the file. Then rerun the training script.
diff --git a/poetry.lock b/poetry.lock
index edc2b949..6116b161 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -44,7 +44,7 @@ testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized",
 
 [[package]]
 name = "adalflow"
-version = "0.2.5"
+version = "0.2.6"
 description = "The Library to Build and Auto-optimize LLM Applications"
 optional = false
 python-versions = ">=3.9, <4.0"
@@ -67,6 +67,8 @@ tqdm = "^4.66.4"
 
 [package.extras]
 anthropic = ["anthropic (>=0.31.1,<0.32.0)"]
+azure = ["azure-core (>=1.24.0,<2.0.0)", "azure-identity (>=1.12.0,<2.0.0)"]
+bedrock = ["boto3 (>=1.35.19,<2.0.0)"]
 cohere = ["cohere (>=5.5.8,<6.0.0)"]
 datasets = []
 faiss-cpu = ["faiss-cpu (>=1.8.0,<2.0.0)"]
@@ -503,10 +505,12 @@ files = [
 
 [package.dependencies]
 click = ">=8.0.0"
+ipython = {version = ">=7.8.0", optional = true, markers = "extra == \"jupyter\""}
 mypy-extensions = ">=0.4.3"
 packaging = ">=22.0"
 pathspec = ">=0.9.0"
 platformdirs = ">=2"
+tokenize-rt = {version = ">=3.2.0", optional = true, markers = "extra == \"jupyter\""}
 
 [package.extras]
 colorama = ["colorama (>=0.4.3)"]
@@ -2250,22 +2254,6 @@ files = [
 [package.dependencies]
 jsonpointer = ">=1.9"
 
-[[package]]
-name = "jsonpickle"
-version = "3.2.2"
-description = "Python library for serializing arbitrary object graphs into JSON"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "jsonpickle-3.2.2-py3-none-any.whl", hash = "sha256:87cd82d237fd72c5a34970e7222dddc0accc13fddf49af84111887ed9a9445aa"},
-    {file = "jsonpickle-3.2.2.tar.gz", hash = "sha256:d425fd2b8afe9f5d7d57205153403fbf897782204437882a477e8eed60930f8c"},
-]
-
-[package.extras]
-docs = ["furo", "rst.linker (>=1.9)", "sphinx"]
-packaging = ["build", "twine"]
-testing = ["bson", "ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "pytest (>=3.5,!=3.7.3)", "pytest-benchmark", "pytest-benchmark[histogram]", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-ruff (>=0.2.1)", "scikit-learn", "scipy", "scipy (>=1.9.3)", "simplejson", "sqlalchemy", "ujson"]
-
 [[package]]
 name = "jsonpointer"
 version = "3.0.0"
@@ -4456,22 +4444,6 @@ files = [
     {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
 ]
 
-[[package]]
-name = "pyvis"
-version = "0.3.2"
-description = "A Python network graph visualization library"
-optional = false
-python-versions = ">3.6"
-files = [
-    {file = "pyvis-0.3.2-py3-none-any.whl", hash = "sha256:5720c4ca8161dc5d9ab352015723abb7a8bb8fb443edeb07f7a322db34a97555"},
-]
-
-[package.dependencies]
-ipython = ">=5.3.0"
-jinja2 = ">=2.9.6"
-jsonpickle = ">=1.4.1"
-networkx = ">=1.11"
-
 [[package]]
 name = "pywin32"
 version = "306"
@@ -5600,6 +5572,17 @@ webencodings = ">=0.4"
 doc = ["sphinx", "sphinx_rtd_theme"]
 test = ["pytest", "ruff"]
 
+[[package]]
+name = "tokenize-rt"
+version = "6.1.0"
+description = "A wrapper around the stdlib `tokenize` which roundtrips."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "tokenize_rt-6.1.0-py2.py3-none-any.whl", hash = "sha256:d706141cdec4aa5f358945abe36b911b8cbdc844545da99e811250c0cee9b6fc"},
+    {file = "tokenize_rt-6.1.0.tar.gz", hash = "sha256:e8ee836616c0877ab7c7b54776d2fefcc3bde714449a206762425ae114b53c86"},
+]
+
 [[package]]
 name = "tokenizers"
 version = "0.19.1"
@@ -6462,4 +6445,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <4.0"
-content-hash = "df5b3eaad85fc2f943506d095b2e3f7094982d55d461f40a7be13d9bb742fc6f"
+content-hash = "1fdfe039ade0d41d28cab1d52f8f9fddcb23599178d433e31702386b2b1c5b2e"
diff --git a/pyproject.toml b/pyproject.toml
index c064d819..e174e616 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ packages = [
 python = ">=3.11, <4.0"
 adalflow = { path = "adalflow", develop = true }
 openai = "^1.34.0"
+black = { extras = ["jupyter"], version = "^24.10.0" }
 
 
 [tool.poetry.group.dev.dependencies]
@@ -33,7 +34,6 @@ pgvector = "^0.2.5"
 cohere = "^5.5.8"
 pydot = "^2.0.0"
 matplotlib = "^3.9.0"
-pyvis = "^0.3.2"
 ollama = "^0.2.1"
 torch = ">=2.0, <3.0"
 textgrad = "^0.1.4"
diff --git a/tutorials/rag/config.py b/tutorials/rag/config.py
new file mode 100644
index 00000000..2a6d383f
--- /dev/null
+++ b/tutorials/rag/config.py
@@ -0,0 +1,23 @@
+configs = {
+    "embedder": {
+        "batch_size": 100,
+        "model_kwargs": {
+            "model": "text-embedding-3-small",
+            "dimensions": 256,
+            "encoding_format": "float",
+        },
+    },
+    "retriever": {
+        "top_k": 2,
+    },
+    "generator": {
+        "model": "gpt-3.5-turbo",
+        "temperature": 0.3,
+        "stream": False,
+    },
+    "text_splitter": {
+        "split_by": "word",
+        "chunk_size": 400,
+        "chunk_overlap": 200,
+    },
+}
diff --git a/tutorials/rag/rag.py b/tutorials/rag/rag.py
new file mode 100644
index 00000000..ab248879
--- /dev/null
+++ b/tutorials/rag/rag.py
@@ -0,0 +1,105 @@
+from typing import Optional, Any, List
+
+import adalflow as adal
+from adalflow.core.db import LocalDB
+
+from adalflow.core.types import ModelClientType
+
+from adalflow.core.string_parser import JsonParser
+from adalflow.components.retriever.faiss_retriever import FAISSRetriever
+from adalflow.components.data_process import (
+    RetrieverOutputToContextStr,
+    ToEmbeddings,
+    TextSplitter,
+)
+
+from adalflow.components.model_client import OpenAIClient
+
+from tutorials.rag.config import configs
+
+
+def prepare_data_pipeline():
+    splitter = TextSplitter(**configs["text_splitter"])
+    embedder = adal.Embedder(
+        model_client=ModelClientType.OPENAI(),
+        model_kwargs=configs["embedder"]["model_kwargs"],
+    )
+    embedder_transformer = ToEmbeddings(
+        embedder=embedder, batch_size=configs["embedder"]["batch_size"]
+    )
+    data_transformer = adal.Sequential(
+        splitter, embedder_transformer
+    )  # sequential will chain together splitter and embedder
+    return data_transformer
+
+
+rag_prompt_task_desc = r"""
+You are a helpful assistant.
+
+Your task is to answer the query that may or may not come with context information.
+When context is provided, you should stick to the context and less on your prior knowledge to answer the query.
+
+Output JSON format:
+{
+    "answer": "The answer to the query",
+}"""
+
+
+class RAG(adal.Component):
+
+    def __init__(self, index_path: str = "index.faiss"):
+        super().__init__()
+
+        self.db = LocalDB.load_state(index_path)
+
+        self.transformed_docs: List[adal.Document] = self.db.get_transformed_data(
+            "data_transformer"
+        )
+        embedder = adal.Embedder(
+            model_client=ModelClientType.OPENAI(),
+            model_kwargs=configs["embedder"]["model_kwargs"],
+        )
+        # map the documents to embeddings
+        self.retriever = FAISSRetriever(
+            **configs["retriever"],
+            embedder=embedder,
+            documents=self.transformed_docs,
+            document_map_func=lambda doc: doc.vector,
+        )
+        self.retriever_output_processors = RetrieverOutputToContextStr(deduplicate=True)
+
+        self.generator = adal.Generator(
+            prompt_kwargs={
+                "task_desc_str": rag_prompt_task_desc,
+            },
+            model_client=OpenAIClient(),
+            model_kwargs=configs["generator"],
+            output_processors=JsonParser(),
+        )
+
+    def generate(self, query: str, context: Optional[str] = None) -> Any:
+        if not self.generator:
+            raise ValueError("Generator is not set")
+
+        prompt_kwargs = {
+            "context_str": context,
+            "input_str": query,
+        }
+        response = self.generator(prompt_kwargs=prompt_kwargs)
+        return response
+
+    def call(self, query: str) -> Any:
+        retrieved_documents = self.retriever(query)
+        # fill in the document
+        for i, retriever_output in enumerate(retrieved_documents):
+            retrieved_documents[i].documents = [
+                self.transformed_docs[doc_index]
+                for doc_index in retriever_output.doc_indices
+            ]
+
+        print(f"retrieved_documents: \n {retrieved_documents}\n")
+        context_str = self.retriever_output_processors(retrieved_documents)
+
+        print(f"context_str: \n {context_str}\n")
+
+        return self.generate(query, context=context_str), retrieved_documents
diff --git a/use_cases/config.py b/use_cases/config.py
index 6eb5b3ed..895ed097 100644
--- a/use_cases/config.py
+++ b/use_cases/config.py
@@ -41,7 +41,7 @@
 gpt_4o_model = {
     "model_client": OpenAIClient(),
     "model_kwargs": {
-        "model": "gpt-4o",
+        "model": "gpt-4o-mini",
         "temperature": 1,
         "top_p": 0.99,
         "max_tokens": 1000,
diff --git a/use_cases/question_answering/bbh/object_count/task.py b/use_cases/question_answering/bbh/object_count/task.py
index 5aebb47b..6f5571f8 100644
--- a/use_cases/question_answering/bbh/object_count/task.py
+++ b/use_cases/question_answering/bbh/object_count/task.py
@@ -40,7 +40,7 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
         few_shot_demos = adal.Parameter(
             data=None,
             role_desc="To provide few shot demos to the language model",
-            requires_opt=True,
+            requires_opt=False,
             param_type=ParameterType.DEMOS,
         )
 
@@ -60,6 +60,14 @@ def call(
         self, question: str, id: str = None
     ) -> Union[adal.GeneratorOutput, adal.Parameter]:
         output = self.llm_counter(prompt_kwargs={"input_str": question}, id=id)
+        print(f"output: {output}, training: {self.training}")
+        if self.training:
+            if output.full_response.error and "429" in output.full_response.error:
+                raise ValueError("Rate limit exceeded")
+        else:
+            if output.error and "429" in output.error:
+                print("rate limit exceeded:")
+                raise ValueError("Rate limit exceeded")
         return output
 
 
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
index 280f7c1a..48309aa7 100644
--- a/use_cases/question_answering/bbh/object_count/train_new.py
+++ b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -130,22 +130,36 @@ def train(
     print(trainer)
 
     train_dataset, val_dataset, test_dataset = load_datasets()
-    trainer.fit(
+    ckpt, _ = trainer.fit(
         train_dataset=train_dataset,
         val_dataset=val_dataset,
         test_dataset=test_dataset,
         resume_from_ckpt=resume_from_ckpt,
     )
+    return ckpt
 
 
 if __name__ == "__main__":
+    import sys
+    import json
 
-    train(
-        debug=True,
+    ckpt = train(
+        debug=False,
         max_steps=12,
         strategy="constrained",
         exclude_input_fields_from_bootstrap_demos=True,
     )
+    print(f"ckpt: {ckpt}")
+    # Save ckpt to a file passed as an argument
+    if len(sys.argv) > 1:  # Check if a file path is provided
+        with open(sys.argv[1], "w") as f:
+            json.dump({"ckpt": ckpt}, f)
 
     # train_diagnose(**gpt_3_model)
-    # train_diagnose_teacher(**gpt_4o_model)
+    # train_diagnose_teacher(**gpt_4o_model) # 4omini works well as an optimizer too
+    # /Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_49c63_run_1.json
+    # 0.72 -> 0.9 val
+    # 0.79 -> 0.92 test
+    # 0.86->0.94 val, 0.79 -> 0.93 with only negative gradients /Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_7a649_run_1.json
+
+    # without gradients -> 0.9 on tests
diff --git a/use_cases/question_answering/bbh/word_sorting/diagnose.py b/use_cases/question_answering/bbh/word_sorting/diagnose.py
index 2faa9e76..af2243ef 100644
--- a/use_cases/question_answering/bbh/word_sorting/diagnose.py
+++ b/use_cases/question_answering/bbh/word_sorting/diagnose.py
@@ -38,34 +38,34 @@ def __init__(
         # eval_fn = lambda question, gt_answer, pred_answer: 1
         super().__init__(task=task, eval_fn=eval_fn)
 
-    def handle_one_task_sample(self, sample: Example):
+    def prepare_task(self, sample: Example):
         return self.task.call, {"question": sample.question, "id": sample.id}
 
-    def evaluate_one_sample(
-        self, sample: Example, y_pred: adal.GeneratorOutput
-    ) -> float:
+    def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
         y_label = ""
         if (
             y_pred is not None and y_pred.data is not None
         ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
-        return self.eval_fn(
-            question=sample.question, gt_answer=sample.answer, pred_answer=y_label
-        )
+        return self.eval_fn, {
+            "question": sample.question,
+            "gt_answer": sample.answer,
+            "pred_answer": y_label,
+        }
 
 
 def evaluate_one_sample():
 
-    trainset, valset, testset = load_datasets(task_name="BBH_word_sorting")
+    trainset, valset, testset = load_datasets(task_name="word_sorting")
     adal_component = WordSortingAdalComponent(
         **gpt_3_model, llm_judge_model_config=gpt_3_model
     )
     example = trainset[1]
-    call, kwargs = adal_component.handle_one_task_sample(example)
+    call, kwargs = adal_component.prepare_task(example)
     output = call(**kwargs)
     print(f"output: {output}")
     print(f"trainset[0]: {example}")
-    score = adal_component.evaluate_one_sample(example, output)
+    score = adal_component.prepare_eval(example, output)
     print(score)
 
 
@@ -74,7 +74,7 @@ def diagnose(
     model_kwargs: Dict,
 ) -> Dict:
 
-    trainset, valset, testset = load_datasets(task_name="BBH_word_sorting")
+    trainset, valset, testset = load_datasets(task_name="word_sorting")
     adal_component = WordSortingAdalComponent(
         model_client, model_kwargs, llm_judge_model_config=gpt_3_model
     )
@@ -91,3 +91,4 @@ def diagnose(
 
     # evaluate_one_sample()
     diagnose(**gpt_3_model)
+    # 0.88 train, 0.84 test, 0.72 val
diff --git a/use_cases/question_answering/bbh/word_sorting/task.py b/use_cases/question_answering/bbh/word_sorting/task.py
index ace7d1e6..0d1d487e 100644
--- a/use_cases/question_answering/bbh/word_sorting/task.py
+++ b/use_cases/question_answering/bbh/word_sorting/task.py
@@ -69,9 +69,7 @@ def test_word_sorting_task():
     task_pipeline = QuestionAnswerTaskPipeline(**gpt_3_model)
     print(task_pipeline)
 
-    train_dataset, val_dataset, test_dataset = load_datasets(
-        task_name="BBH_word_sorting"
-    )
+    train_dataset, val_dataset, test_dataset = load_datasets(task_name="word_sorting")
 
     example = train_dataset[0]
     question = example.question
diff --git a/use_cases/question_answering/bbh/word_sorting/train.py b/use_cases/question_answering/bbh/word_sorting/train.py
index 4d1af9e3..12518206 100644
--- a/use_cases/question_answering/bbh/word_sorting/train.py
+++ b/use_cases/question_answering/bbh/word_sorting/train.py
@@ -52,23 +52,23 @@ def __init__(
             text_optimizer_model_config=text_optimizer_model_config,
         )
 
-    def handle_one_task_sample(self, sample: Example):
+    def prepare_task(self, sample: Example):
         return self.task.call, {"question": sample.question, "id": sample.id}
 
-    def evaluate_one_sample(
-        self, sample: Example, y_pred: adal.GeneratorOutput
-    ) -> float:
+    def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
         y_label = ""
         if (
             y_pred is not None and y_pred.data is not None
         ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
 
-        return self.eval_fn(
-            question=sample.question, gt_answer=sample.answer, pred_answer=y_label
-        )
+        return self.eval_fn, {
+            "question": sample.question,
+            "gt_answer": sample.answer,
+            "pred_answer": y_label,
+        }
 
-    def handle_one_loss_sample(self, sample: Example, pred: adal.Parameter):
+    def prepare_loss(self, sample: Example, pred: adal.Parameter):
         # prepare gt parameter
         y_gt = adal.Parameter(
             name="y_gt",
@@ -92,19 +92,6 @@ def handle_one_loss_sample(self, sample: Example, pred: adal.Parameter):
             }
         }
 
-    # def configure_backward_engine(self):
-    #     super().configure_backward_engine_helper(**self.backward_engine_model_config)
-
-    # def configure_teacher_generator(self):
-    #     super().configure_teacher_generator_helper(**self.teacher_model_config)
-
-    # def configure_optimizers(
-    #     self,
-    # ):  # TODO: train the text optimizer and the demo optimizer at the same time
-    #     to = super().configure_text_optimizer_helper(**self.text_optimizer_model_config)
-    #     do = super().configure_demo_optimizer_helper()
-    #     return to + do
-
 
 def train(
     train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle
@@ -141,9 +128,7 @@ def train(
     )
     print(trainer)
 
-    train_dataset, val_dataset, test_dataset = load_datasets(
-        task_name="BBH_word_sorting"
-    )
+    train_dataset, val_dataset, test_dataset = load_datasets(task_name="word_sorting")
     for dataset in [train_dataset, val_dataset, test_dataset]:
         for example in dataset:
             example.question = example.question.replace(
diff --git a/use_cases/question_answering/bbh/word_sorting/train_paper.py b/use_cases/question_answering/bbh/word_sorting/train_paper.py
index 00a84830..2c4b0e15 100644
--- a/use_cases/question_answering/bbh/word_sorting/train_paper.py
+++ b/use_cases/question_answering/bbh/word_sorting/train_paper.py
@@ -42,7 +42,6 @@ def __init__(
             eval_fn=eval_fn,
             eval_fn_desc="exact_match: 1 if str(y) == str(y_gt) else 0",
         )
-        # eval_fn = lambda question, gt_answer, pred_answer: 1
         super().__init__(
             task=task,
             eval_fn=eval_fn,
@@ -52,23 +51,23 @@ def __init__(
             text_optimizer_model_config=text_optimizer_model_config,
         )
 
-    def handle_one_task_sample(self, sample: Example):
+    def prepare_task(self, sample: Example):
         return self.task.call, {"question": sample.question, "id": sample.id}
 
-    def evaluate_one_sample(
-        self, sample: Example, y_pred: adal.GeneratorOutput
-    ) -> float:
+    def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:
         y_label = ""
         if (
             y_pred is not None and y_pred.data is not None
         ):  # if y_pred and y_pred.data: might introduce bug when the data is 0
             y_label = y_pred.data
 
-        return self.eval_fn(
-            question=sample.question, gt_answer=sample.answer, pred_answer=y_label
-        )
+        return self.eval_fn, {
+            "question": sample.question,
+            "gt_answer": sample.answer,
+            "pred_answer": y_label,
+        }
 
-    def handle_one_loss_sample(self, sample: Example, pred: adal.Parameter):
+    def prepare_loss(self, sample: Example, pred: adal.Parameter):
         # prepare gt parameter
         y_gt = adal.Parameter(
             name="y_gt",
@@ -92,19 +91,6 @@ def handle_one_loss_sample(self, sample: Example, pred: adal.Parameter):
             }
         }
 
-    # def configure_backward_engine(self):
-    #     super().configure_backward_engine_helper(**self.backward_engine_model_config)
-
-    # def configure_teacher_generator(self):
-    #     super().configure_teacher_generator_helper(**self.teacher_model_config)
-
-    # def configure_optimizers(
-    #     self,
-    # ):  # TODO: train the text optimizer and the demo optimizer at the same time
-    #     to = super().configure_text_optimizer_helper(**self.text_optimizer_model_config)
-    #     do = super().configure_demo_optimizer_helper()
-    #     return to + do
-
 
 def train(
     train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle
@@ -141,9 +127,7 @@ def train(
     )
     print(trainer)
 
-    train_dataset, val_dataset, test_dataset = load_datasets(
-        task_name="BBH_word_sorting"
-    )
+    train_dataset, val_dataset, test_dataset = load_datasets(task_name="word_sorting")
     trainer.fit(
         train_dataset=train_dataset,
         val_dataset=val_dataset,
diff --git a/use_cases/text_grad_2.0_train.py b/use_cases/text_grad_2.0_train.py
new file mode 100644
index 00000000..37ff320d
--- /dev/null
+++ b/use_cases/text_grad_2.0_train.py
@@ -0,0 +1,58 @@
+import subprocess
+import tempfile
+import json
+
+# List of experiments to run
+object_count = "use_cases/question_answering/bbh/object_count/train_new.py"
+hotpot_qa_multi_hop_rag = "benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py"
+
+ckpt_values = []
+experiments = [
+    object_count,
+    # hotpot_qa_multi_hop_rag,
+]
+
+# Optional: Arguments for each experiment (if needed)
+experiment_args = {
+    object_count: "",
+    # hotpot_qa_multi_hop_rag: "",
+}
+ckpt_values = {}
+
+
+def run_experiment(script, args):
+    try:
+        # Use a temporary file to store the ckpt
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp_file:
+            temp_path = temp_file.name
+
+        print(f"Running {script} with args: {args}")
+        subprocess.run(
+            f"python {script} {temp_path} {args}",
+            check=True,
+            shell=True,
+            text=True,
+        )
+
+        # Read the ckpt value from the temporary file
+        with open(temp_path, "r") as f:
+            data = json.load(f)
+            ckpt = data.get("ckpt")
+            print(f"Checkpoint from {script}: {ckpt}")
+            return ckpt
+
+    except subprocess.CalledProcessError as e:
+        print(f"Experiment {script} failed with error: {e}")
+        return None
+
+
+if __name__ == "__main__":
+    for experiment in experiments:
+        args = experiment_args.get(experiment, "")
+        ckpt = run_experiment(experiment, args)
+        if ckpt:
+            ckpt_values[experiment] = ckpt
+
+    print("\nAll Checkpoints:")
+    for experiment, ckpt in ckpt_values.items():
+        print(f"{experiment}: {ckpt}")