diff --git a/.github/ISSUE_TEMPLATE/1_bug_report.yaml b/.github/ISSUE_TEMPLATE/1_bug_report.yaml
index e15a7dc3f..7657b0cd9 100644
--- a/.github/ISSUE_TEMPLATE/1_bug_report.yaml
+++ b/.github/ISSUE_TEMPLATE/1_bug_report.yaml
@@ -1,6 +1,6 @@
 name: Report a bug
 description: Any errors that you encounter.
-labels: ['needs triage', 'bug']
+labels: ['bug']
 body:
   - type: markdown
     attributes:
@@ -71,10 +71,7 @@ body:
         Please provide details about your environment, including the following:
         - OS (e.g., Linux, Windows, macOS)
       value: |
-        <details>
-          <summary>Current environment</summary>
-
-        </details>
+        - OS: [e.g., Linux, Windows, macOS]
     validations:
       required: false
 
diff --git a/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml b/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml
index 68bcf055a..a9c471fde 100644
--- a/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml
+++ b/.github/ISSUE_TEMPLATE/2_suggest_improvement.yaml
@@ -1,6 +1,6 @@
 name: Improvement suggestion
 description: Suggest an improvement, a code refactor, or deprecation
-labels: ['needs triage', 'refactor']
+labels: ['[adalflow] improvement']
 body:
   - type: textarea
     attributes:
diff --git a/.github/ISSUE_TEMPLATE/3_feature_request.yaml b/.github/ISSUE_TEMPLATE/3_feature_request.yaml
index 8aa0bc38e..c11f05f27 100644
--- a/.github/ISSUE_TEMPLATE/3_feature_request.yaml
+++ b/.github/ISSUE_TEMPLATE/3_feature_request.yaml
@@ -1,6 +1,6 @@
 name: Feature request
 description: Propose a feature for this project
-labels: ["needs triage", "feature"]
+labels: ["[adalflow] new feature request"]
 body:
   - type: textarea
     attributes:
diff --git a/.github/ISSUE_TEMPLATE/4_documenting.yaml b/.github/ISSUE_TEMPLATE/4_documenting.yaml
index a7c6e7b77..e5b77b0a5 100644
--- a/.github/ISSUE_TEMPLATE/4_documenting.yaml
+++ b/.github/ISSUE_TEMPLATE/4_documenting.yaml
@@ -1,6 +1,6 @@
 name: Typos and doc fixes
 description: Tell us about how we can improve our documentation and Google colab/ipynb notebooks.
-labels: ["needs triage", "docs"]
+labels: ["documentation"]
 body:
   - type: textarea
     attributes:
diff --git a/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml b/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml
index f26eed2c2..819dbd6ee 100644
--- a/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml
+++ b/.github/ISSUE_TEMPLATE/5_suggest_integration.yaml
@@ -1,6 +1,6 @@
-name: Feature request
+name: New integration proposal
 description: Propose a new integration for this project, either db, retriever, model_client. We highly recommend you to find a POC from the provider team to work together on this.
-labels: ['needs triage', 'feature']
+labels: ['[adalflow] integration']
 body:
   - type: textarea
     attributes:
diff --git a/.github/ISSUE_TEMPLATE/6_suggest_usecases_benchmarks.yaml b/.github/ISSUE_TEMPLATE/6_suggest_usecases_benchmarks.yaml
new file mode 100644
index 000000000..ea93a39ed
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/6_suggest_usecases_benchmarks.yaml
@@ -0,0 +1,32 @@
+name: Suggest use cases and benchmarks
+description: Propose new use cases that AdalFlow should support or benchmarks that we should compare against
+labels: ["new use cases/benchmarks"]
+body:
+  - type: textarea
+    attributes:
+      label: Description & Motivation
+      description: A clear and concise description of the new use case or benchmark proposal
+      placeholder: |
+        Please outline the motivation for the proposal.
+
+
+  - type: textarea
+    attributes:
+      label: Pitch
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Alternatives
+      description: A clear and concise description of any alternative solutions or features you've considered, if any.
+    validations:
+      required: false
+
+  - type: textarea
+    attributes:
+      label: Additional context
+      description: Add any other context or screenshots about the feature request here.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index ad44518c2..065ad099b 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,8 @@
 blank_issues_enabled: false
 contact_links:
+  - name: 👍 Upvote an issue
+    url: https://github.com/SylphAI-Inc/AdalFlow/issues
+    about: You should upvote an issue if it is important to you.
   - name: 💬 Chat with us
     url: https://discord.gg/ezzszrRZvT
     about: Live chat with experts, engineers, and users in our Discord community.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 000000000..11b273776
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,38 @@
+## What does this PR do?
+
+<!--
+Please include a summary of the change and which issue is fixed.
+Please also include relevant motivation and context.
+List any dependencies that are required for this change.
+
+If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
+
+The following links the related issue to the PR (https://docs.github.com/en/free-pro-team@latest/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword)
+-->
+
+Fixes #\<issue_number>
+
+<!-- Does your PR introduce any breaking changes? If yes, please list them. -->
+
+<details>
+  <summary><b>Before submitting</b></summary>
+
+- Was this **discussed/agreed** via a GitHub issue? (not for typos and docs)
+- [ ] Did you read the [contributor guideline](https://adalflow.sylph.ai/contributor/index.html)?
+- [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together?
+- Did you make sure to **update the documentation** with your changes? (if necessary)
+- Did you write any **new necessary tests**? (not for typos and docs)
+- [ ] Did you verify new and **existing tests pass** locally with your changes?
+- Did you list all the **breaking changes** introduced by this pull request?
+
+
+</details>
+
+
+<!--
+
+Did you have fun?
+
+Make sure you had fun coding 🙃
+
+-->
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ecf5b0704..6f85a6c80 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,13 +14,24 @@ repos:
     hooks:
       - id: black
         args: ['--line-length=88']
+        exclude: ^docs/|.*\.(json|yaml|md|txt)$
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.4.2
     hooks:
       # Run the linter.
       - id: ruff
-        args: ['--fix', '--extend-ignore=E402']
+        args: ['--fix']
+        exclude: ^docs/|.*\.(json|yaml|md|txt)$
+
+  # Add local hooks to run custom commands
+  - repo: local
+    hooks:
+      - id: run-make-format
+        name: Run Make Format
+        entry: make format
+        language: system
+        pass_filenames: false
   # - repo: https://github.com/pycqa/flake8
   #   rev: 4.0.1
   #   hooks:
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..3670e02f6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,51 @@
+# Define variables for common directories and commands
+PYTHON = poetry run
+SRC_DIR = .
+
+# Default target: Show help
+.PHONY: help
+help:
+	@echo "Available targets:"
+	@echo "  setup            Install dependencies and set up pre-commit hooks"
+	@echo "  format           Run Black and Ruff to format the code"
+	@echo "  lint             Run Ruff to check code quality"
+	@echo "  test             Run tests with pytest"
+	@echo "  precommit        Run pre-commit hooks on all files"
+	@echo "  clean            Clean up temporary files and build artifacts"
+
+# Install dependencies and set up pre-commit hooks
+.PHONY: setup
+setup:
+	poetry install
+	poetry run pre-commit install
+
+# Format code using Black and Ruff
+.PHONY: format
+format:
+	$(PYTHON) black $(SRC_DIR)
+	git ls-files | xargs pre-commit run black --files
+
+# Run lint checks using Ruff
+.PHONY: lint
+lint:
+	$(PYTHON) ruff check $(SRC_DIR)
+
+# Run all pre-commit hooks on all files
+.PHONY: precommit
+precommit:
+	$(PYTHON) pre-commit run --all-files
+
+# Run tests
+.PHONY: test
+test:
+	$(PYTHON) pytest
+
+# Clean up temporary files and build artifacts
+.PHONY: clean
+clean:
+	rm -rf .pytest_cache
+	rm -rf .mypy_cache
+	rm -rf __pycache__
+	rm -rf build dist *.egg-info
+	find . -type d -name "__pycache__" -exec rm -r {} +
+	find . -type f -name "*.pyc" -delete
diff --git a/README.md b/README.md
index 04ec9a5ac..61e411fb1 100644
--- a/README.md
+++ b/README.md
@@ -76,8 +76,21 @@ For AI researchers, product teams, and software engineers who want to learn the
 
 
 
+# Quick Start
 
 
+Install AdalFlow with pip:
+
+```bash
+pip install adalflow
+```
+
+Please refer to the [full installation guide](https://adalflow.sylph.ai/get_started/installation.html) for more details.
+
+
+* Try the [Building Quickstart](https://colab.research.google.com/drive/1TKw_JHE42Z_AWo8UuRYZCO2iuMgyslTZ?usp=sharing) in Colab to see how AdalFlow can build the task pipeline, including Chatbot, RAG, agent, and structured output.
+* Try the [Optimization Quickstart](https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/qas/adalflow_object_count_auto_optimization.ipynb) to see how AdalFlow can optimize the task pipeline.
+
 
 # Why AdalFlow
 
@@ -111,6 +124,8 @@ Here is an optimization demonstration on a text classification task:
 
 Among all libraries, AdalFlow achieved the highest accuracy with manual prompting (starting at 82%) and the highest accuracy after optimization.
 
+
+
 Further reading: [Optimize Classification](https://adalflow.sylph.ai/use_cases/classification.html)
 
 ## Light, Modular, and Model-Agnostic Task Pipeline
@@ -127,6 +142,14 @@ You have full control over the prompt template, the model you use, and the outpu
   <img src="https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/AdalFlow_task_pipeline.png" alt="AdalFlow Task Pipeline">
 </p>
 
+Many providers and models accessible via the same interface:
+
+<p align="center">
+  <img src="https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/multi-providers.png" alt="AdalFlow Model Providers">
+</p>
+
+[All available model providers](https://adalflow.sylph.ai/apis/components/components.model_client.html)
+
 
 <!-- LLMs are like water; they can be shaped into anything, from GenAI applications such as chatbots, translation, summarization, code generation, and autonomous agents to classical NLP tasks like text classification and named entity recognition. They interact with the world beyond the model’s internal knowledge via retrievers, memory, and tools (function calls). Each use case is unique in its data, business logic, and user experience.
 
@@ -192,15 +215,6 @@ Just define it as a ``Parameter`` and pass it to AdalFlow's ``Generator``.
 
 </p>
 
-# Quick Install
-
-Install AdalFlow with pip:
-
-```bash
-pip install adalflow
-```
-
-Please refer to the [full installation guide](https://adalflow.sylph.ai/get_started/installation.html) for more details.
 
 
 
@@ -224,6 +238,13 @@ AdalFlow full documentation available at [adalflow.sylph.ai](https://adalflow.sy
 
 AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could go beyond mere calculations. As a team led by a female founder, we aim to inspire more women to pursue careers in AI.
 
+# Community & Contributors
+
+The AdalFlow is a community-driven project, and we welcome everyone to join us in building the future of LLM applications.
+
+Join our [Discord](https://discord.gg/ezzszrRZvT) community to ask questions, share your projects, and get updates on AdalFlow.
+
+To contribute, please read our [Contributor Guide](https://adalflow.sylph.ai/contributor/index.html).
 
 # Contributors
 
diff --git a/SETUP.md b/SETUP.md
deleted file mode 100644
index 887eff68b..000000000
--- a/SETUP.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Create a kernel
-
-```poetry run python -m ipykernel install --user --name my-project-kernel```
diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md
index 315814a65..e5b806f8b 100644
--- a/adalflow/CHANGELOG.md
+++ b/adalflow/CHANGELOG.md
@@ -1,3 +1,8 @@
+
+## [0.2.6] - 2024-11-25
+### Improved
+- Add default `max_tokens=512` to the `AnthropicAPIClient` to avoid the error when the user does not provide the `max_tokens` in the prompt.
+
 ## [0.2.5] - 2024-10-28
 
 ### Fixed
diff --git a/adalflow/PACKAGING.md b/adalflow/PACKAGING.md
index c7f4d26c1..11d873066 100644
--- a/adalflow/PACKAGING.md
+++ b/adalflow/PACKAGING.md
@@ -1,4 +1,4 @@
-#Poetry Packaging Guide
+# Poetry Packaging Guide
 ## Development
 
 To install optional dependencies, use the following command:
@@ -27,3 +27,16 @@ Better to use a colab to update the whl file and test the installation.
 ```bash
 pip install "dist/adalflow-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"
 ```
+
+
+## Update the version
+
+1. Update the version in `pyproject.toml`
+2. Add the version number in `adalflow/__init__.py`
+3. Build the package
+4. Test the package locally
+5. Push the changes to the repository
+6. Ensure to run `poetry lock --no-update` in the root directory (project-level) to update the lock file for other directories such as `tutorials`, `use_cases`, `benchmarks`, etc.
+7. Update the `CHANGELOG.md` file with the new version number and the changes made in the new version.
+
+## TODO: we need to automate the version update process. Help is appreciated.
diff --git a/adalflow/adalflow/__init__.py b/adalflow/adalflow/__init__.py
index af90187b1..fa4cd930c 100644
--- a/adalflow/adalflow/__init__.py
+++ b/adalflow/adalflow/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.5"
+__version__ = "0.2.6"
 
 from adalflow.core.component import Component, fun_to_component
 from adalflow.core.container import Sequential
diff --git a/adalflow/adalflow/components/model_client/anthropic_client.py b/adalflow/adalflow/components/model_client/anthropic_client.py
index 1c83f421a..6d1fa65e9 100644
--- a/adalflow/adalflow/components/model_client/anthropic_client.py
+++ b/adalflow/adalflow/components/model_client/anthropic_client.py
@@ -15,7 +15,8 @@
 anthropic = safe_import(
     OptionalPackages.ANTHROPIC.value[0], OptionalPackages.ANTHROPIC.value[1]
 )
-import anthropic
+
+# import anthropic
 from anthropic import (
     RateLimitError,
     APITimeoutError,
@@ -43,7 +44,10 @@ class AnthropicAPIClient(ModelClient):
 
     Visit https://docs.anthropic.com/en/docs/intro-to-claude for more api details.
 
-    Ensure "max_tokens" are set.
+    Note:
+
+    As antropic API needs users to set max_tokens, we set up a default value of 512 for the max_tokens.
+    You can override this value by passing the max_tokens in the model_kwargs.
 
     Reference: 8/1/2024
     - https://docs.anthropic.com/en/docs/about-claude/models
@@ -63,6 +67,7 @@ def __init__(
         self.chat_completion_parser = (
             chat_completion_parser or get_first_message_content
         )
+        self.default_max_tokens = 512
 
     def init_sync_client(self):
         api_key = self._api_key or os.getenv("ANTHROPIC_API_KEY")
@@ -115,6 +120,8 @@ def convert_inputs_to_api_kwargs(
             api_kwargs["messages"] = [
                 {"role": "user", "content": input},
             ]
+            if "max_tokens" not in api_kwargs:
+                api_kwargs["max_tokens"] = self.default_max_tokens
             # if input and input != "":
             #     api_kwargs["system"] = input
         else:
@@ -167,4 +174,4 @@ async def acall(
         elif model_type == ModelType.LLM:
             return await self.async_client.messages.create(**api_kwargs)
         else:
-            raise ValueError(f"model_type {model_type} is not supported")
\ No newline at end of file
+            raise ValueError(f"model_type {model_type} is not supported")
diff --git a/adalflow/adalflow/components/model_client/bedrock_client.py b/adalflow/adalflow/components/model_client/bedrock_client.py
index 549c19882..df100ac3b 100644
--- a/adalflow/adalflow/components/model_client/bedrock_client.py
+++ b/adalflow/adalflow/components/model_client/bedrock_client.py
@@ -15,34 +15,52 @@
 
 bedrock_runtime_exceptions = boto3.client(
     service_name="bedrock-runtime",
-    region_name=os.getenv("AWS_REGION_NAME", "us-east-1")
+    region_name=os.getenv("AWS_REGION_NAME", "us-east-1"),
 ).exceptions
 
 
 def get_first_message_content(completion: Dict) -> str:
     r"""When we only need the content of the first message.
     It is the default parser for chat completion."""
-    return completion['output']['message']['content'][0]['text']
+    return completion["output"]["message"]["content"][0]["text"]
 
 
-__all__ = ["BedrockAPIClient", "get_first_message_content", "bedrock_runtime_exceptions"]
+__all__ = [
+    "BedrockAPIClient",
+    "get_first_message_content",
+    "bedrock_runtime_exceptions",
+]
 
 
 class BedrockAPIClient(ModelClient):
     __doc__ = r"""A component wrapper for the Bedrock API client.
+
+    Note:
+
+    This client needs a lot more work to be fully functional.
+    (1) Setup the AWS credentials.
+    (2) Access to the modelId.
+    (3) Convert the modelId to standard model.
+
+    To setup the AWS credentials, follow the instructions here:
+    https://docs.aws.amazon.com/bedrock/latest/userguide/getting-started.html
+
+    Additionally, this medium article is a good reference:
+    https://medium.com/@harangpeter/setting-up-aws-bedrock-for-api-based-text-inference-dc25ab2b216b
+
     Visit https://docs.aws.amazon.com/bedrock/latest/APIReference/welcome.html for more api details.
     """
 
     def __init__(
-            self,
-            aws_profile_name=None,
-            aws_region_name=None,
-            aws_access_key_id=None,
-            aws_secret_access_key=None,
-            aws_session_token=None,
-            aws_connection_timeout=None,
-            aws_read_timeout=None,
-            chat_completion_parser: Callable = None,
+        self,
+        aws_profile_name="default",
+        aws_region_name="us-west-2",  # Use a supported default region
+        aws_access_key_id=None,
+        aws_secret_access_key=None,
+        aws_session_token=None,
+        aws_connection_timeout=None,
+        aws_read_timeout=None,
+        chat_completion_parser: Callable = None,
     ):
         super().__init__()
         self._aws_profile_name = aws_profile_name
@@ -56,7 +74,7 @@ def __init__(
         self.session = None
         self.sync_client = self.init_sync_client()
         self.chat_completion_parser = (
-                chat_completion_parser or get_first_message_content
+            chat_completion_parser or get_first_message_content
         )
 
     def init_sync_client(self):
@@ -67,14 +85,16 @@ def init_sync_client(self):
         aws_profile_name = self._aws_profile_name or os.getenv("AWS_PROFILE_NAME")
         aws_region_name = self._aws_region_name or os.getenv("AWS_REGION_NAME")
         aws_access_key_id = self._aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
-        aws_secret_access_key = self._aws_secret_access_key or os.getenv("AWS_SECRET_ACCESS_KEY")
+        aws_secret_access_key = self._aws_secret_access_key or os.getenv(
+            "AWS_SECRET_ACCESS_KEY"
+        )
         aws_session_token = self._aws_session_token or os.getenv("AWS_SESSION_TOKEN")
 
         config = None
         if self._aws_connection_timeout or self._aws_read_timeout:
             config = Config(
                 connect_timeout=self._aws_connection_timeout,  # Connection timeout in seconds
-                read_timeout=self._aws_read_timeout  # Read timeout in seconds
+                read_timeout=self._aws_read_timeout,  # Read timeout in seconds
             )
 
         session = boto3.Session(
@@ -85,6 +105,8 @@ def init_sync_client(self):
             aws_session_token=aws_session_token,
         )
         bedrock_runtime = session.client(service_name="bedrock-runtime", config=config)
+
+        self._client = session.client(service_name="bedrock")
         return bedrock_runtime
 
     def init_async_client(self):
@@ -93,7 +115,7 @@ def init_async_client(self):
     def parse_chat_completion(self, completion):
         log.debug(f"completion: {completion}")
         try:
-            data = completion['output']['message']['content'][0]['text']
+            data = completion["output"]["message"]["content"][0]["text"]
             usage = self.track_completion_usage(completion)
             return GeneratorOutput(data=None, usage=usage, raw_response=data)
         except Exception as e:
@@ -104,18 +126,33 @@ def parse_chat_completion(self, completion):
 
     def track_completion_usage(self, completion: Dict) -> CompletionUsage:
         r"""Track the completion usage."""
-        usage = completion['usage']
+        usage = completion["usage"]
         return CompletionUsage(
-            completion_tokens=usage['outputTokens'],
-            prompt_tokens=usage['inputTokens'],
-            total_tokens=usage['totalTokens']
+            completion_tokens=usage["outputTokens"],
+            prompt_tokens=usage["inputTokens"],
+            total_tokens=usage["totalTokens"],
         )
 
+    def list_models(self):
+        # Initialize Bedrock client (not runtime)
+
+        try:
+            response = self._client.list_foundation_models()
+            models = response.get("models", [])
+            for model in models:
+                print(f"Model ID: {model['modelId']}")
+                print(f"  Name: {model['name']}")
+                print(f"  Description: {model['description']}")
+                print(f"  Provider: {model['provider']}")
+                print("")
+        except Exception as e:
+            print(f"Error listing models: {e}")
+
     def convert_inputs_to_api_kwargs(
-            self,
-            input: Optional[Any] = None,
-            model_kwargs: Dict = {},
-            model_type: ModelType = ModelType.UNDEFINED
+        self,
+        input: Optional[Any] = None,
+        model_kwargs: Dict = {},
+        model_type: ModelType = ModelType.UNDEFINED,
     ):
         """
         check the converse api doc here:
@@ -133,11 +170,11 @@ def convert_inputs_to_api_kwargs(
     @backoff.on_exception(
         backoff.expo,
         (
-                bedrock_runtime_exceptions.ThrottlingException,
-                bedrock_runtime_exceptions.ModelTimeoutException,
-                bedrock_runtime_exceptions.InternalServerException,
-                bedrock_runtime_exceptions.ModelErrorException,
-                bedrock_runtime_exceptions.ValidationException
+            bedrock_runtime_exceptions.ThrottlingException,
+            bedrock_runtime_exceptions.ModelTimeoutException,
+            bedrock_runtime_exceptions.InternalServerException,
+            bedrock_runtime_exceptions.ModelErrorException,
+            bedrock_runtime_exceptions.ValidationException,
         ),
         max_time=5,
     )
diff --git a/adalflow/adalflow/components/model_client/google_client.py b/adalflow/adalflow/components/model_client/google_client.py
index 64fdac470..b7e431c84 100644
--- a/adalflow/adalflow/components/model_client/google_client.py
+++ b/adalflow/adalflow/components/model_client/google_client.py
@@ -34,24 +34,27 @@ class GoogleGenAIClient(ModelClient):
 
     Info: 8/1/2024
     Tested: gemini-1.0-pro, gemini-1.5-pro-latest
-    class UsageMetadata(proto.Message):
 
-        prompt_token_count: int = proto.Field(
-            proto.INT32,
-            number=1,
-        )
-        cached_content_token_count: int = proto.Field(
-            proto.INT32,
-            number=4,
-        )
-        candidates_token_count: int = proto.Field(
-            proto.INT32,
-            number=2,
-        )
-        total_token_count: int = proto.Field(
-            proto.INT32,
-            number=3,
-        )
+    .. code-block:: python
+
+        class UsageMetadata(proto.Message):
+
+            prompt_token_count: int = proto.Field(
+                proto.INT32,
+                number=1,
+            )
+            cached_content_token_count: int = proto.Field(
+                proto.INT32,
+                number=4,
+            )
+            candidates_token_count: int = proto.Field(
+                proto.INT32,
+                number=2,
+            )
+            total_token_count: int = proto.Field(
+                proto.INT32,
+                number=3,
+            )
     """
 
     def __init__(self, api_key: Optional[str] = None):
diff --git a/adalflow/adalflow/core/types.py b/adalflow/adalflow/core/types.py
index 41af464a4..187245101 100644
--- a/adalflow/adalflow/core/types.py
+++ b/adalflow/adalflow/core/types.py
@@ -39,6 +39,7 @@
     GroqAPIClient,
     OpenAIClient,
     GoogleGenAIClient,
+    OllamaClient,
 )
 
 
@@ -86,6 +87,7 @@ class ModelClientType:
     GROQ = GroqAPIClient
     OPENAI = OpenAIClient
     GOOGLE_GENAI = GoogleGenAIClient
+    OLLAMA = OllamaClient
 
 
 # TODO: define standard required outputs
diff --git a/adalflow/adalflow/optim/optimizer.py b/adalflow/adalflow/optim/optimizer.py
index b6a68d2a2..c6fad814f 100644
--- a/adalflow/adalflow/optim/optimizer.py
+++ b/adalflow/adalflow/optim/optimizer.py
@@ -67,7 +67,7 @@ def __init__(
         dataset: Sequence[DataClass] = None,
         exclude_input_fields_from_bootstrap_demos: bool = False,
         *args,
-        **kwargs
+        **kwargs,
     ):
         self._weighted = weighted
         self.dataset = dataset
diff --git a/adalflow/adalflow/utils/lazy_import.py b/adalflow/adalflow/utils/lazy_import.py
index aa4c988fe..ccf793327 100644
--- a/adalflow/adalflow/utils/lazy_import.py
+++ b/adalflow/adalflow/utils/lazy_import.py
@@ -78,7 +78,7 @@ class LazyImport:
     """
 
     def __init__(
-            self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
+        self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
     ):
         if args or kwargs:
             raise TypeError(
diff --git a/adalflow/pyproject.toml b/adalflow/pyproject.toml
index 49a6cbe89..982406175 100644
--- a/adalflow/pyproject.toml
+++ b/adalflow/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "adalflow"
 
-version = "0.2.5"
+version = "0.2.6"
 description = "The Library to Build and Auto-optimize LLM Applications"
 authors = ["Li Yin <li@sylphai.com>"]
 readme = "README.md"
@@ -118,10 +118,20 @@ url = "https://pypi.nvidia.com"
 # priority = "supplemental"
 # url = "https://pypi.nvidia.com"
 
-[tool.ruff]
-exclude = ["images"]
-
 
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+# for formatting and linting
+[tool.black]
+line-length = 88
+target-version = ["py311"]
+
+[tool.ruff]
+exclude = ["images"]
+lint.extend-ignore = [
+    "E402",  # Ignore module-level import issues
+    "E731",
+    "UP007", # Wants | over Union, which breaks 3.8
+]
diff --git a/adalflow/tests/test_random_sample.py b/adalflow/tests/test_random_sample.py
index 3cc6f56ff..e6abfb295 100644
--- a/adalflow/tests/test_random_sample.py
+++ b/adalflow/tests/test_random_sample.py
@@ -1,13 +1,12 @@
 import unittest
 from typing import TypeVar
+from adalflow.core.functional import random_sample
+
 
 # Assuming the random_sample function is defined here or imported
 T_co = TypeVar("T_co", covariant=True)
 
 
-from adalflow.core.functional import random_sample
-
-
 class TestRandomSample(unittest.TestCase):
 
     def setUp(self):
diff --git a/adalflow/tutorials/adalflow_dataclasses.py b/adalflow/tutorials/adalflow_dataclasses.py
new file mode 100644
index 000000000..5b9274554
--- /dev/null
+++ b/adalflow/tutorials/adalflow_dataclasses.py
@@ -0,0 +1,64 @@
+from dataclasses import dataclass, field
+from typing import Dict
+import adalflow as adal
+from adalflow.components.model_client import GroqAPIClient
+
+# Define the QA template using jinja2 syntax
+qa_template = r"""<SYS>
+You are a helpful assistant.
+<OUTPUT_FORMAT>
+{{output_format_str}}
+</OUTPUT_FORMAT>
+</SYS>
+<USER> {{input_str}} </USER>"""
+
+
+# Define the output structure using dataclass
+@dataclass
+class BasicQAOutput(adal.DataClass):
+    explanation: str = field(
+        metadata={"desc": "A brief explanation of the concept in one sentence."}
+    )
+    example: str = field(metadata={"desc": "An example of the concept in a sentence."})
+    __output_fields__ = ["explanation", "example"]
+
+
+# Define the QA component
+class QA(adal.Component):
+    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
+        super().__init__()
+        parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True)
+        self.generator = adal.Generator(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            template=qa_template,
+            prompt_kwargs={"output_format_str": parser.get_output_format_str()},
+            output_processors=parser,
+        )
+
+    def call(self, query: str):
+        """Synchronous call to generate response"""
+        return self.generator.call({"input_str": query})
+
+    async def acall(self, query: str):
+        """Asynchronous call to generate response"""
+        return await self.generator.acall({"input_str": query})
+
+
+def run_basic_example():
+    """Run a basic example of the QA component"""
+    qa = QA(
+        model_client=GroqAPIClient(),
+        model_kwargs={"model": "llama3-8b-8192"},
+    )
+    response = qa("What is LLM?")
+    print("\nResponse:")
+    print(response)
+    print(f"BasicQAOutput: {response.data}")
+    print(f"Explanation: {response.data.explanation}")
+    print(f"Example: {response.data.example}")
+
+
+if __name__ == "__main__":
+    print("Running basic QA example...")
+    run_basic_example()
diff --git a/adalflow/tutorials/adalflow_text_splitter.py b/adalflow/tutorials/adalflow_text_splitter.py
new file mode 100644
index 000000000..250e29967
--- /dev/null
+++ b/adalflow/tutorials/adalflow_text_splitter.py
@@ -0,0 +1,114 @@
+from adalflow.components.data_process.text_splitter import TextSplitter
+from adalflow.core.types import Document
+from typing import Optional, Dict
+
+
+def split_by_words(
+    text: str, chunk_size: int = 5, chunk_overlap: int = 1, doc_id: Optional[str] = None
+) -> list:
+    """Split text by words with configurable parameters
+
+    Args:
+        text: Input text to split
+        chunk_size: Maximum number of words per chunk
+        chunk_overlap: Number of overlapping words between chunks
+        doc_id: Optional document ID
+
+    Returns:
+        List of Document objects containing the split text chunks
+    """
+    text_splitter = TextSplitter(
+        split_by="word", chunk_size=chunk_size, chunk_overlap=chunk_overlap
+    )
+
+    doc = Document(text=text, id=doc_id or "doc1")
+
+    return text_splitter.call(documents=[doc])
+
+
+def split_by_tokens(
+    text: str, chunk_size: int = 5, chunk_overlap: int = 0, doc_id: Optional[str] = None
+) -> list:
+    """Split text by tokens with configurable parameters
+
+    Args:
+        text: Input text to split
+        chunk_size: Maximum number of tokens per chunk
+        chunk_overlap: Number of overlapping tokens between chunks
+        doc_id: Optional document ID
+
+    Returns:
+        List of Document objects containing the split text chunks
+    """
+    text_splitter = TextSplitter(
+        split_by="token", chunk_size=chunk_size, chunk_overlap=chunk_overlap
+    )
+
+    doc = Document(text=text, id=doc_id or "doc1")
+
+    return text_splitter.call(documents=[doc])
+
+
+def split_by_custom(
+    text: str,
+    split_by: str,
+    separators: Dict[str, str],
+    chunk_size: int = 1,
+    chunk_overlap: int = 0,
+    doc_id: Optional[str] = None,
+) -> list:
+    """Split text using custom separator with configurable parameters
+
+    Args:
+        text: Input text to split
+        split_by: Custom split type that matches separator dict key
+        separators: Dictionary mapping split types to separator strings
+        chunk_size: Maximum chunk size
+        chunk_overlap: Overlap size between chunks
+        doc_id: Optional document ID
+
+    Returns:
+        List of Document objects containing the split text chunks
+    """
+    text_splitter = TextSplitter(
+        split_by=split_by,
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        separators=separators,
+    )
+
+    doc = Document(text=text, id=doc_id or "doc1")
+
+    return text_splitter.call(documents=[doc])
+
+
+def example_usage():
+    """Example showing how to use the text splitting functions"""
+    # Word splitting example
+    text = "Example text. More example text. Even more text to illustrate."
+    word_splits = split_by_words(text, chunk_size=5, chunk_overlap=1)
+    print("\nWord Split Example:")
+    for doc in word_splits:
+        print(doc)
+
+    # Token splitting example
+    token_splits = split_by_tokens(text, chunk_size=5, chunk_overlap=0)
+    print("\nToken Split Example:")
+    for doc in token_splits:
+        print(doc)
+
+    # Custom separator example
+    question_text = "What is your name? How old are you? Where do you live?"
+    custom_splits = split_by_custom(
+        text=question_text,
+        split_by="question",
+        separators={"question": "?"},
+        chunk_size=1,
+    )
+    print("\nCustom Separator Example:")
+    for doc in custom_splits:
+        print(doc)
+
+
+if __name__ == "__main__":
+    example_usage()
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 1b7f68248..5ae4aa784 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,3 +1,3 @@
-Benchmarking is an integral development part of the project. 
+Benchmarking is an integral development part of the project.
 
-Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
\ No newline at end of file
+Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
diff --git a/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json b/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
index 13abbe213..a105c71ed 100644
--- a/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
+++ b/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
@@ -499,4 +499,4 @@
         "answer": "grand assembly",
         "type": "bridge"
     }
-]
\ No newline at end of file
+]
diff --git a/benchmarks/ReAct_agent/paper_data/paper_dev_10.json b/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
index 2f53d1e7a..edada19aa 100644
--- a/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
+++ b/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
@@ -429,4 +429,4 @@
             ]
         ]
     }
-]
\ No newline at end of file
+]
diff --git a/benchmarks/ReAct_agent/utils/tools.py b/benchmarks/ReAct_agent/utils/tools.py
index 31a53b278..c0eebd3a5 100644
--- a/benchmarks/ReAct_agent/utils/tools.py
+++ b/benchmarks/ReAct_agent/utils/tools.py
@@ -9,15 +9,17 @@
 Apply the similar code for wikipedia search from the Paper (open-source).
 """
 
+
 # copy code from the paper
 def clean_str(p):
-  return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+    return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+
 
 # normalization copied from the paper's code
 def normalize_answer(s):
     def remove_articles(text):
         return re.sub(r"\b(a|an|the)\b", " ", text)
-  
+
     def white_space_fix(text):
         return " ".join(text.split())
 
@@ -39,29 +41,33 @@ def search(entity: str) -> str:
     # Format the entity for URL encoding
     entity_formatted = entity.replace(" ", "+")
     url = f"https://en.wikipedia.org/w/index.php?search={entity_formatted}"
-    
+
     # Fetch the page
     response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    
+    soup = BeautifulSoup(response.text, "html.parser")
+
     # Check if the exact page was found or suggest similar items
     # when <div class=mw-search-result-heading> is detected, it means the entity page is not found on wikipedia
     result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})
-    
-    if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
+
+    if (
+        result_divs
+    ):  # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
         # get Similar results
         similar_titles = [div.a.get_text() for div in result_divs]
-        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
+        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}"  # return the top 5 similar titles
     else:
         # the paper uses page to represent content in <p>
         # Extract xontent
-        page_list = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
+        page_list = [
+            p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
+        ]
         # TODO: Recursive search, if find any concept that needs more search then call search again
         # if any("may refer to:" in p for p in page_list):
         #     search(entity)
 
         # restructure & clean the page content following the paper's logic
-        page = ''
+        page = ""
         for p in page_list:
             if len(p.split(" ")) > 2:
                 page += clean_str(p)
@@ -69,28 +75,36 @@ def search(entity: str) -> str:
                     page += "\n"
         paragraphs = page.split("\n")
         paragraphs = [p.strip() for p in paragraphs if p.strip()]
-        
+
         sentences = []
         for p in paragraphs:
-            sentences += p.split('. ')
-        sentences = [s.strip() + '.' for s in sentences if s.strip()]
-        
+            sentences += p.split(". ")
+        sentences = [s.strip() + "." for s in sentences if s.strip()]
+
         # return the first 5 sentences
         if sentences:
-            return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)
+            return (
+                " ".join(sentences[:5]) if len(sentences) >= 5 else " ".join(sentences)
+            )
         else:
             return "No content found on this page."
-        
+
         # TODO: clean the paragraphs and return the searched content
 
 
 def lookup(text: str, keyword: str) -> str:
     """
-        returns the sentences containing keyword in the current passage.
+    returns the sentences containing keyword in the current passage.
     """
-    sentences = text.split('.')
-    matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]
+    sentences = text.split(".")
+    matching_sentences = [
+        sentence.strip() + "."
+        for sentence in sentences
+        if keyword.lower() in sentence.lower()
+    ]
     if not matching_sentences:
         return "No sentences found with the keyword."
     else:
-        return ' '.join(matching_sentences)  # Join all matching sentences into a single string
+        return " ".join(
+            matching_sentences
+        )  # Join all matching sentences into a single string
diff --git a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
index 6e87a990a..b6cfe9e62 100644
--- a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
+++ b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
@@ -114,7 +114,7 @@ def train(
         **gpt_3_model,
         teacher_model_config=gpt_4o_model,
         text_optimizer_model_config=gpt_4o_model,
-        backward_engine_model_config=gpt_4o_model
+        backward_engine_model_config=gpt_4o_model,
     )
     print(adal_component)
     trainer = adal.Trainer(
diff --git a/docs/Makefile b/docs/Makefile
index 7a9152fd1..9e4cbe75e 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -8,7 +8,7 @@ SOURCEDIR     = source
 BUILDDIR      = build
 APIDOCOUTDIR  = $(SOURCEDIR)/apis
 PYTHON        := $(shell command -v python3 2>/dev/null || command -v python 2>/dev/null)
-POETRY        = poetry 
+POETRY        = poetry
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/docs/README.md b/docs/README.md
index 7c472013a..01f9ae124 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -22,12 +22,13 @@ AdalFlow uses [Sphinx](https://www.sphinx-doc.org/en/master/) and [reStructuredT
 AdalFlow documentation uses `poetry` and `pyproject.toml` to manage the dependencies. You can install all the necessary packages by running:
 
 ```
-
-poetry lock
+poetry lock --no-update
 poetry install
 ```
 
-**NOTE:** The default versions of `sphinx-build` and `poetry` that come with Linux may be out of date for AdalFlow's documentation. Using "pip install" will get the latest versions of those tools.
+**NOTE:** The default versions of `sphinx-build` and `poetry` that come with Linux may be out of date for AdalFlow's documentation. Using "pip install" will get the latest versions of those packages.
+
+
 
 <!--
 All the packages are manged in the project's ``pyproject.toml`` file in the doc dependencies section. You can install all the necessary packages by running:
@@ -99,12 +100,13 @@ For example, in the `index.rst`, the `:caption: Get Started` corresponds to the
 
 Existing sections include:
 
-`get_started/`: Includes installation and AdalFlow in 10 minutes
+* `get_started/`: Includes installation and AdalFlow in 10 minutes
 
-`tutorials/`: Includes our main tutorials
-`use_cases/`: Includes the use cases of AdalFlow that will be added in the future and which accepts community contributions
+* `tutorials/`: Includes our main tutorials
 
-`apis/`: All the source-code-related documents will be included in this directory
+* `use_cases/`: Includes the use cases of AdalFlow that will be added in the future and which accepts community contributions
+
+* `apis/`: All the source-code-related documents will be included in this directory
 
 <!-- `resources/`: Include all the AdalFlow-relevant resources. -->
 
@@ -175,23 +177,7 @@ make html
 
 And you will be able to find the newly added use_cases module.
 
-### Add New Docs
-
-If you want to add any written files such as README.md to the documentation, there is an easy way to transform the files to `.rst` files using `Pandoc`.
-
-- First, install Pandoc with Homebrew:
-
-    `brew install pandoc`
 
-- Then run `pandoc -s <input .md file> -o <path/to/target_rst_file>`. For example, in the root directory run `pandoc -s README.md -o docs/source/get_started/introduction.rst`.This command will take content from `README.md` and create an `introduction.rst` file in the specified directory.
-
-After editing, run
-
-```python
-cd docs
-make clean
-make html
-```
 
 ### Commit the Edited Documentation
 
@@ -199,7 +185,7 @@ Remember to exclude any unnecessary files in `.gitignore`. Please don’t commit
 
 Please push your updates to the GitHub repo.
 
-The structure of the code base and the docs:
+The structure of the docs directory looks like this:
 
 ```
 AdalFlow/
@@ -227,12 +213,25 @@ AdalFlow/
 │   ├── conf.py
 │   ├── index.rst
 │   ├── Makefile
-├── core/
-│   ├── __init__.py
-│   ├── module1.py
-│   ├── module2.py
-├── components/
-│   ├── __init__.py
-│   ├── module1.py
-│   ├── module2.py
+│   ├── pyproject.toml
+│   ├── poetry.lock
+```
+
+
+## [Optional] Convert Markdown to reStructuredText
+
+If you want to add any written files such as README.md to the documentation, there is an easy way to transform the files to `.rst` files using `Pandoc`.
+
+- First, install Pandoc with Homebrew:
+
+    `brew install pandoc`
+
+- Then run `pandoc -s <input .md file> -o <path/to/target_rst_file>`. For example, in the root directory run `pandoc -s README.md -o docs/source/get_started/introduction.rst`.This command will take content from `README.md` and create an `introduction.rst` file in the specified directory.
+
+After editing, run
+
+```python
+cd docs
+make clean
+make html
 ```
diff --git a/docs/requirements.txt b/docs/requirements.txt
index e59cca034..14ce30d7b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -8,4 +8,4 @@ PyYAML
 readthedocs-sphinx-search==0.3.2
 numpy
 tqdm
-tiktoken
\ No newline at end of file
+tiktoken
diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
index 95445edc9..73ee6a04c 100644
--- a/docs/source/_static/css/custom.css
+++ b/docs/source/_static/css/custom.css
@@ -344,4 +344,4 @@ table tr:hover {
 		font-size: 8px;
 		/* Further adjust text size for smallest screens */
 	}
-}
\ No newline at end of file
+}
diff --git a/docs/source/_static/images/adalflow_code_samples.png b/docs/source/_static/images/adalflow_code_samples.png
new file mode 100644
index 000000000..9a3bf3c2e
Binary files /dev/null and b/docs/source/_static/images/adalflow_code_samples.png differ
diff --git a/docs/source/_static/images/adalflow_files.png b/docs/source/_static/images/adalflow_files.png
new file mode 100644
index 000000000..bcffa12b7
Binary files /dev/null and b/docs/source/_static/images/adalflow_files.png differ
diff --git a/docs/source/_static/images/adalflow_issue_template.png b/docs/source/_static/images/adalflow_issue_template.png
new file mode 100644
index 000000000..9a85ec448
Binary files /dev/null and b/docs/source/_static/images/adalflow_issue_template.png differ
diff --git a/docs/source/_static/images/adalflow_issues.png b/docs/source/_static/images/adalflow_issues.png
new file mode 100644
index 000000000..527c3a0f7
Binary files /dev/null and b/docs/source/_static/images/adalflow_issues.png differ
diff --git a/docs/source/_static/images/multi-providers.png b/docs/source/_static/images/multi-providers.png
new file mode 100644
index 000000000..291b69f33
Binary files /dev/null and b/docs/source/_static/images/multi-providers.png differ
diff --git a/docs/source/_static/images/pr_draft.png b/docs/source/_static/images/pr_draft.png
new file mode 100644
index 000000000..38eca9a5e
Binary files /dev/null and b/docs/source/_static/images/pr_draft.png differ
diff --git a/docs/source/apis/index.rst b/docs/source/apis/index.rst
index 13d19ff93..7c124c748 100644
--- a/docs/source/apis/index.rst
+++ b/docs/source/apis/index.rst
@@ -1,3 +1,5 @@
+.. _apis:
+
 API Reference
 =============
 
diff --git a/docs/source/contributor/contribute_to_code.rst b/docs/source/contributor/contribute_to_code.rst
index 185e23bb5..1fe1ca6e2 100644
--- a/docs/source/contributor/contribute_to_code.rst
+++ b/docs/source/contributor/contribute_to_code.rst
@@ -1,11 +1,12 @@
-Coding and Testing
+Development Essentials
 ======================================
-This document will cover how you can contribute to lightRAG codebase.
+This document will cover how you can set up the AdalFlow codebase and start coding, testing, and documentation.
 
 Set Up
 ^^^^^^^^^^^^^^^^^^^
-The current ``LightRAG`` code contribution supports `poetry <https://python-poetry.org/>`_ setup only. The team is working on optimizing the library and will get back to support more environment soon.
-If you are only interested in using ``LightRAG`` as a package, please check our `installation guide <https://lightrag.sylph.ai/get_started/installation.html#install-lightrag>`_.
+We mainly use `poetry <https://python-poetry.org/>`_ for dependency management and virtual environment setup.
+
+.. If you are only interested in using ``LightRAG`` as a package, please check our `installation guide <https://lightrag.sylph.ai/get_started/installation.html#install-lightrag>`_.
 
 To set up ``poetry`` and contribute, please check the following steps:
 
@@ -13,11 +14,45 @@ To set up ``poetry`` and contribute, please check the following steps:
 
    .. code-block:: bash
 
-        git clone https://github.com/SylphAI-Inc/LightRAG
-        cd LightRAG
+        git clone https://github.com/SylphAI-Inc/AdalFlow
+        cd AdalFlow
+
+2. **Set Up the AdalFlow Dev Environment:**
+   The AdalFlow source code, tests, and dependencies are in the ``./adalflow`` directory.
+   The ``./adalflow/pyproject.toml`` controls the dependencies for the ``adalflow`` package.
+   Use Poetry to install the dependencies and set up the virtual environment:
+
+   .. code-block:: bash
+
+        cd adalflow
+        poetry install
+        poetry shell
+
+   Test the setup by running the tests at the ``./adalflow`` directory:
+
+   .. code-block:: bash
+
+        pytest tests
+
+3. **Set Up the Root Dev Environment:**
+   At the root directory, we have a ``pyproject.toml`` file that controls the dependencies for the root directory.
+
+   .. code-block:: bash
+
+        poetry install
+        poetry shell
+
+   This will install all relevant dependencies and the files in /use_cases, /tutorials, and /benchmarks will be using the development version of the ``adalflow`` package.
+   You should see output similar to the following:
+
+   .. code-block:: bash
+
+        - Installing adalflow (0.2.5 /Users/liyin/Documents/test/AdalFlow/adalflow)
+
 
-2. **Configure API Keys:**
 
+
+4. **[Optional] Configure API Keys in the Root Directory:**
    Copy the example environment file and add your API keys:
 
    .. code-block:: bash
@@ -31,50 +66,46 @@ To set up ``poetry`` and contribute, please check the following steps:
         # COHERE_API_KEY=YOUR_API_KEY_IF_YOU_USE_COHERE
         # HF_TOKEN=YOUR_API_KEY_IF_YOU_USE_HF
 
-3. **Install Dependencies:**
-
-    The ``./lightrag/pyproject.toml`` controls the dependencies for the ``LightRAG`` package.
-    Use Poetry to install the dependencies and set up the virtual environment:
+   This will be helpful for you to run tutorials, use cases, and benchmarks.
 
-   .. code-block:: bash
-        cd lightrag
-        poetry install
-        poetry shell
 
-Codebase Structure
+Coding
 ^^^^^^^^^^^^^^^^^^^
-It is recommended to check our `LightRAG codebase structure <https://lightrag.sylph.ai/developer_notes/index.html>`_ and current `API references <https://lightrag.sylph.ai/apis/index.html>`_ to familiarize yourself with the directories and paths before contributing.
+Structuring
+~~~~~~~~~~~~~~~
+It is recommended to check our the structuring in :ref:`part1-structuring` and :doc:`../apis/index`
+to understand the codebase structure.
 
-Code Examples
-^^^^^^^^^^^^^^^^^^^
-We want to support you with our best. We have included code samples in the `tutorial <https://lightrag.sylph.ai/developer_notes/index.html>`_ for you to refer to.
+What to code
+~~~~~~~~~~~~~~~
+Please check the :ref:`part3-contributing-steps` to see some coding examples and steps to contribute to the codebase.
 
-We inlcude a list of potential samples(`We are working in progress to add more`):
+Code Tips
+~~~~~~~~~~~~~~~
+* Please follow the `Google Python Style Guide <https://google.github.io/styleguide/pyguide.html>`_.
 
-- `ModelClient integration <https://lightrag.sylph.ai/developer_notes/model_client.html#model-inference-sdks>`_. This document will help if you want to add new models not included in our codebase.
-- `Retriever Integration <https://lightrag.sylph.ai/developer_notes/retriever.html#retriever-in-action>`_. We provide different retrivers but you can create more.
+* Functions and classes should include standard docstrings and comments. Please refer to `documentation contribution guidelines <./contribute_to_document.html>`_ for standard docstrings.
 
-Code Tips
-^^^^^^^^^^^^^^^^^^^
-* When writing code, it is appreciated to include any important docstrings and comments. Please refer to `documentation contribution guidelines <./contribute_to_document.html>`_ for standard docstrings.
-* LightRAG is a Python library and if you could follow the `Google Python Style Guide <https://google.github.io/styleguide/pyguide.html>`_, the codebase will be more consistent.
+Copilot
+~~~~~~~~~~~~~~~
+We suggest you use `GitHub Copilot <https://copilot.github.com/>`_ to help you write code faster and more efficiently.
+You can follow this `Guide <https://docs.github.com/en/copilot/using-github-copilot/getting-code-suggestions-in-your-ide-with-github-copilot>`_ to set it up with your IDE.
+There are other options like `Cursor <https://www.cursor.com/>`_ and `Tabnine <https://www.tabnine.com/>`_ that you can use as well.
 
 Dependencies
-^^^^^^^^^^^^^^^^^^^
-If you want to add any new dependencies to the package, please include them in your PR description to inform us.
-Since we have already set up the testing automatic workflow in GitHub, please also set your new dependencies in
-``./lightrag/pyproject.toml`` file ``[tool.poetry.group.test.dependencies]`` section to avoid dependency errors in our CI/CD workflow.
-
-In order to correctly add the dependency using ``poetry``, please run
+~~~~~~~~~~~~~~~
+1. If you want to add any new dependencies to the package, please include them in your PR description to inform us.
+2. Since we have already set up the testing automatic workflow in GitHub, please also set your new dependencies in ``./adalflow/pyproject.toml`` file ``[tool.poetry.group.test.dependencies]`` section to avoid dependency errors in our CI/CD workflow.
+   In order to correctly add the dependency using ``poetry``, please run
 
-.. code-block:: bash
+   .. code-block:: bash
 
-        poetry add --group test <package-name>
+      poetry add --group test <package-name>
 
 Testing
 ^^^^^^^^^^^^^^^^^^^
 After you update the code, please make sure your code is well tested before making a pull request.
-There is a ``./lightrag/tests`` folder in the project directory to host your unit testing cases.
+There is a ``./adalflow/tests`` folder in the project directory to host your unit testing cases.
 
 You might need to install the testing packages using ``poetry``:
 
@@ -82,13 +113,27 @@ For example:
 
 .. code-block:: bash
 
-        poetry add --group test unittest
-        poetry add --group test pytest
-        poetry add --group test mypy
+        poetry install # or
+        poetry add --group test
+
+
+You should name your test files with the following format: ``test_<module_name>.py``.
+
+Activate the virtual environment from `./adalflow` and run the tests:
 
+.. code-block:: bash
+
+    poetry shell
+    pytest
 
-All the test scripts should start with ``test_``. For example, run the individual test for ``components`` with:
+To run a specific test file, you can use the following command:
 
 .. code-block:: bash
 
-    python lightrag/tests/test_components.py
+    pytest tests/test_components.py
+
+For more details on testing, please refer to the `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/adalflow/tests/README.md>`_ under the ``./adalflow/tests`` directory.
+
+Documentation
+^^^^^^^^^^^^^^^^^^^
+Please refer to the `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/docs/README.md>`_ under the ``./docs`` directory for more details on how to contribute to the documentation.
diff --git a/docs/source/contributor/contribute_to_document.rst b/docs/source/contributor/contribute_to_document.rst
deleted file mode 100644
index bfe270309..000000000
--- a/docs/source/contributor/contribute_to_document.rst
+++ /dev/null
@@ -1,114 +0,0 @@
-Documenting
-===============================================
-
-.. contents::
-   :local:
-   :depth: 2
-
-.. _Documentation Contribution:
-
-- **User-Facing Documentation**: Found on the main docs site. These include tutorials, guides, and usage documentation meant for end users.
-- **Developer Documentation**: Located within the repository's READMEs and the ``docs/`` directory. These documents are more technical and intended for contributors and maintainers.
-
-This section is about user-facing documentation.
-
-LightRAG uses `Sphinx <https://www.sphinx-doc.org/en/master/>`_ for documentation, leveraging both `reStructuredText <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_ and Sphinx's `autodoc <https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html>`_ feature to pull docstrings from code and organize them through ``.rst`` files. Our documentation is split into:
-
-Souce Code Docstring Standard
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Sphinx automatically pulls docstrings from source code and uses them as the docs in API reference. For clarity and consistency, we have a standard for all the code contributors.
-
-Aligning with Pytorch, LightRAG uses the `Google style with Sphinx <https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html>`_ for formatting docstrings `(detailed styles) <https://google.github.io/styleguide/pyguide.html>`_, emphasizing **docstring** and **type control** to guarantee the document and code quality.
-
-
-Setup & Build Documentation
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-**1. Clone the GitHub Project**
-
-.. code-block:: bash
-
-    git clone https://github.com/SylphAI-Inc/LightRAG.git
-
-**2. Install Necessary Packages**
-
-LightRAG's documentation style is `pydata_sphinx_theme <https://pydata-sphinx-theme.readthedocs.io/en/stable/>`_.
-
-.. Install by ``pip``:
-
-.. .. code-block:: bash
-
-..     cd docs
-..     pip install -r requirements.txt
-
-Install by ``poetry`` along with all other dependencies for LightRAG:
-
-.. code-block:: bash
-
-    poetry install
-
-**3. Build the Documentation**
-
-.. code-block:: bash
-
-    cd docs
-    make html
-
-
-**conf.py**
-
-This file (``docs/source/conf.py``) contains configurations used by Sphinx, including extensions, templates, HTML theme, and language settings.
-
-**Source Code Doc-string**
-
-Follow `Google style docstrings <https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html>`_ to update your source code docstrings. Limit lines to **80** characters for better readability in various environments.
-
-**RST Files**: Directly edit ``.rst`` files for broader changes or new sections. Use the ``.. toctree::`` directive to link documents.
-
-The ``.rst`` files are in the ``docs/source``. The majority of ``.rst`` files in the ``docs/source/apis`` are generated automatically from the Python code docstrings using ``sphinx-apidoc``.
-
-To shorten the doc generating process, please remove the files that is not included in your project.
-
-The Sphinx build will show warnings but the docs will still be completed.
-
-If you have a module folder containing code, for example, ``components/``, please add the following line to the ``docs/Makefile`` in the ``apidoc:`` section.
-
-.. code-block:: bash
-
-    @sphinx-apidoc -o $(APIDOCOUTDIR)/components ../components --separate --force
-
-
-**4. View the Documentation Locally**
-
-After building, open ``docs/build/html/index.html`` in a web browser. If you face issues with local resources, such as the browser prohibits loading the web page correctly, run a local server:
-
-.. code-block:: bash
-
-    cd docs/build
-    python -m http.server 8000 <path_to_html_output>
-
-Then navigate to the corresbonding site in your browser. E.g. it can be `http://127.0.0.1:8000/`.
-
-
-
-Adding Documentation Tests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-To ensure the documentation remains up-to-date, LightRAG uses Sphinx's Doctest extension. Add ``.. testcode::`` to your ``.rst`` files or docstrings and run ``make doctest`` to test your documentation snippets.
-
-To manually run these tests, run:
-
-.. code-block:: bash
-
-    cd docs
-    make doctest
-
-
-Documentation Dependencies
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If your documentation requires any new dependencies, please include it in the ``pyproject.toml`` under the root directory, include it in your PR description and let us know.
-
-Commit Changes
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-After making changes, commit the ``.rst`` and source files, avoiding the ``docs/build`` directory, and push them to your GitHub fork for review.
diff --git a/docs/source/contributor/contribution.rst b/docs/source/contributor/contribution.rst
index 8656877a0..f1c0394ce 100644
--- a/docs/source/contributor/contribution.rst
+++ b/docs/source/contributor/contribution.rst
@@ -1,83 +1,473 @@
-Overall contributing process
+Contributing Process
 =======================================
+Welcome to the AdalFlow community!
 
-Welcome to the AdalFlow community! We're building the most user-friendly, modular library for building and auto-optimizing LLM applications, from Chatbots, RAGs, to Agents.
-Think of AdalFlow to LLM applications and in-context learning is like PyTorch/TensorFlow/JAX for AI modeling.
-The goal is to provide basic and foudamental building blocks to build advanced applications with auto-optimization out-of-the-box.
-As we mature, we might see more RAG, memory-based chatbots, or agents frameworks will be built on top of AdalFlow building blocks such as retriever, generator.
+We tried to make the process simple and clear, but it can always improve.
+Share your suggestions on `Discord <https://discord.com/invite/ezzszrRZvT>`_ or `Github Discussion <https://github.com/SylphAI-Inc/AdalFlow/discussions>`_.
 
-We highly suggest you to read our :ref:`design principle<lightrag_design_philosophy>` before you start contributing.
 
-We only accept high quality contribution.
-We appreciate contributors but we have to hold our libary responsible for users.
-Once you decide to contribute, we hope you are not just to list your name on the repo, but more importantly, you learn and improve your own skills! you support your faviroty projects and community!
+Quick Start
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+1. The `Github issues <https://github.com/SylphAI-Inc/AdalFlow/issues>`_ is the best place to find your first task. Look for tasks labeled `good first issue`.
+2. Follow the :doc:`./contribute_to_code` to set up your environment and start coding, testing, and documenting.
+3. Review the `PR & Review Process <#pr-review-process>`_ to complete the review and iteration process.
+   We aim to maximize both your learning and the library's quality.
+
+
+.. note::
+
+   You can use 👍 to show that you want a particular issue to be addressed.
+
+.. _part1-structuring:
+Part 1: Structuring
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+To dive deeper, we’ll explain our repository structure, issue tracking, and label system.
+
+..  what to contribute(with examples), contributing steps with proposal/discussion/coding/testing/documentation/pr/review process.
+.. The coding and testing will be discussed more in details in `Code Contribution Guidelines <./contribute_to_code.html>`_ and the documentation will be discussed in `Documentation Contribution Guidelines <./contribute_to_document.html>`_.
+
+Repo Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Our repository has a clear structure, divided into six subdirectories:
+
+.. code-block:: text
+
+   .
+   ├── .github/
+   ├── adalflow/
+   │   ├── adalflow/
+   │   ├── tests/
+   |   ├── pyproject.toml
+   ├── docs/
+   |   |── pyproject.toml
+   ├── tutorials/
+   ├── use_cases/
+   ├── benchmarks/
+   ├── notebooks/
+   |   ├── tutorials/
+   |   ├── use_cases/
+   |   ├── benchmarks/
+   ├── .env_example
+   ├── .gitignore
+   ├── .pre-commit-config.yaml
+   ├── LICENSE.md
+   ├── README.md
+   ├── poetry.lock
+   ├── pyproject.toml
+
+1. The ``/adalflow`` directory contains the source code for the `AdalFlow` library,including its implementation, tests, and a dedicated `pyproject.toml` file.
+2. The ``/docs`` directory houses the documentation for the `AdalFlow` library and also includes its own `pyproject.toml` file.
+   We use `reStructuredText` for the documentation.  For more details, please refer to `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/docs/README.md>`_ under the ``./docs`` directory.
+3. Additionally, it includes the following directories:
+
+   - ``/tutorials``: Contains tutorials for the `AdalFlow` library, for each core feature or class.
+   - ``/use_cases``: Covers various use cases, likely end to end applications even with auto-optimization.
+   - ``/benchmarks``: Includes benchmarks to evaluate the library with other libraries or frameworks.
+4. ``/notebooks`` directory contains all notebooks used across `tutorials`, `use_cases`, and `benchmarks`.
+
+
+
+Issue & Label System
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We use issues to manage bugs, features, and discussions.
+To organize them, we’ve carefully designed **15** labels.
+
+
+.. figure:: ../_static/images/adalflow_issues.png
+   :alt: AdalFlow Issues
+   :align: center
+   :width: 700px
+
+   **Type**: The type of the issue, such as bug, feature, or discussion.
+
+We use three categories of labels in parallel:
+
+* Type of issue: There are 7 types of issues.
+
+  - ``[adalflow]`` indicates the issue relates to `AdalFlow` source code in the ``/adalflow`` directory.
+  Within this directory, the ``/adalflow`` contains the source code, and the ``/tests`` contains test code.
+  3 labels here include: ``integration``, ``improvement``, ``core feature``.
+
+  - We use ``bug`` for code-related issues across all directories.
+  - ``documentation`` for items in the ``/docs``, ``/tutorials``, ``/use_cases``, ``/benchmarks``, and ``/notebooks`` directories.
+  - ``new use cases/benchmarks`` for issues in ``/use_cases`` and ``/benchmarks``.
+  - ``question`` for general inquiries.
+
+* How to proceed: There are 4 labels.
+
+  - ``good first issue`` for tasks suitable for new contributors.
+  - ``wontfix`` for issues that won’t be addressed in the library.
+  - ``duplicate`` for issues already tracked elsewhere.
+  - ``help wanted`` indicates priority signal, discussion, and pr are needed.
+
+* Priority: There are 3 levels.
+
+  - ``P0`` for the highest priority issues.
+  - ``P1`` for medium priority.
+  - ``P2`` for the lowest priority.
+
+
+.. note::
+   * All the above 14 labels can be used for both issues and PRs.
+   * ``ready-for-pr`` is exclusive to issues and indicates the issue is ready for a PR.
+   * ``ready-to-go`` is exclusive to PRs and indicates the PR is ready to be merged.
+
+This following table will provide a quick overview of them all.
+
+
+.. list-table:: Label overview
+   :header-rows: 1
+   :widths: 40 50 30
+
+   * - Type of issue (7 labels)
+     - How to proceed (3 labels)
+     - Priority (3 labels)
+   * - [adalflow] suggest integration
+     -
+     -
+   * - [adalflow] suggest improvement
+     - wontfix
+     - P0
+   * - [adalflow] suggest core feature
+     - good first issue
+     -
+   * - new use cases/benchmarks
+     - duplicate (aggregate) and close one
+     - P1
+   * - [adalflow] bug
+     - help wanted
+     - P2
+   * - question
+     - ``ready-for-pr``
+     -
+   * - documentation
+     - ``ready-to-go``
+     -
 
-It took us 3 months to setup a contributing guide, as we did explore with users and think a lot on how to organize labels and what is the best process that can control the quality of our library while leveraing the open-source community. **We will continously improve our process and we welcome any suggestion and advice.**
-We are determined to make AdalFlow as great and legendary as PyTorch.
+How to create an issue
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We’ve created five templates to make it easy for you to create an issue.
 
-.. ``LightRAG``'s contribution process is similar to most open source projects on GitHub. We encourage new project ideas and the communication between ``LightRAG`` team, developers and the broader community.
-.. Please don't forget to join us on `Discord <https://discord.com/invite/ezzszrRZvT>`_.
+.. figure:: ../_static/images/adalflow_issue_template.png
+   :alt: AdalFlow Issue Template
+   :align: center
+   :width: 700px
 
-.. toctree::
-   :maxdepth: 2
+   Five templates for creating issues
 
-   contribution_process
-   contribute_to_code
-   contribute_to_document
+Each template automatically assigns relevant labels to the issue.
 
-Contribution Process
-----------------------------
-You are always welcomed to contribute even if you've never participated in open source project before.
-Here is the basic contribution process:
 
-Environment
+How to assign priority
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While our team marks priorities based on our best judgment, we also welcome community input to help us prioritize issues.
+
+You can use 👍 to indicate how important a particular issue is to you.
+We’ll consider the ``# of 👍 / time_period`` as an additional signal for setting priorities.
+
+
+
+Part 2: What to contribute
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-When contributing, please note:
-LightRAG separates the source code environment and documentation environment.
 
-* To activate the code environment, you should run ``poetry install`` and ``poetry shell`` under ``./lightrag``. The ``./lightrag/pyproject.toml`` contains the dependencies for the ``LightRAG`` package.
+This section provides more details on how each issue relates to our codebase.
+We’ll include example PRs to help clarify these connections.
+
+The following table offers a quick overview.
+More details on each type of contribution in Part 3.
+
+.. list-table:: What to Contribute (by 7 Labels) and Example PRs
+   :header-rows: 1
+   :widths: 20 50 30
+
+   * - Label
+     - Contribution Suggestions
+     - Example Issue/PR
+   * - [adalflow] bug
+     - Fix bugs reported in issues, can relate to /adalflow code or /tutorials/user_cases/benchmarks.
+     - `Issue 134 <https://github.com/SylphAI-Inc/AdalFlow/issues/134>`_ and `PR 135 <https://github.com/SylphAI-Inc/AdalFlow/pull/135>`_
+   * - [adalflow] suggest integration
+     - Add new integrations with model inference SDKs (:ref:`model_client<tutorials-model_client>`) or database retrievers (:ref:`retriever<tutorials-retriever>`) or tools or other libraries/frameworks.
+     - `Ollama integration request <https://github.com/SylphAI-Inc/AdalFlow/issues/96>`_ and `PR 97 <https://github.com/SylphAI-Inc/AdalFlow/pull/97>`_. This often involves tests, tutorial, and documentation.
+   * - [adalflow] suggest improvement
+     - Enhance existing features for better performance or usability, can relate to /adalflow code or /tutorials/user_cases/benchmarks.
+     - `Stream the response request <https://github.com/SylphAI-Inc/AdalFlow/issues/149>`_ and `PR 158 <https://github.com/SylphAI-Inc/AdalFlow/pull/158>`_.
+   * - [adalflow] suggest core feature
+     - Develop **new** core functionalities in `/adalflow` directory, such as `text_splitter` or `memory`.
+     -
+   * - new use cases/benchmarks
+     - Design benchmarks or propose new use cases for `adalflow`.
+     -
+   * - documentation
+     - Improve existing documentation under `/docs` or `/notebooks` directories or create new documentation for existing code.
+     - `Issue 194 <https://github.com/SylphAI-Inc/AdalFlow/issues/194>`_, `Issue 123 <https://github.com/SylphAI-Inc/AdalFlow/issues/123>`_,  and `PR 260 <https://github.com/SylphAI-Inc/AdalFlow/pull/260>`_.
+   * - question
+     - Answer user queries or provide clarifications about the library.
+     -
 
-* To activate the documentation environment, you can run ``poetry install`` and ``poetry shell`` under ``.``. The ``./pyproject.toml`` controls documentation dependencies.
+.. _part3-contributing-steps:
 
-Find a direction to work on
+Part 3: Contributing Steps
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-The team builds ``LightRAG`` based on latest researches and product cases. But you might have your own task to apply ``LightRAG``.
-Therefore, you can extend ``LightRAG`` and add any new features you believe will solve yours or others' problems.
-If you don't have any idea yet, you can:
 
-* Check the `existing issues <https://github.com/SylphAI-Inc/LightRAG/issues>`_ and see if there is anyone you know how to fix or you'd love to fix.
+Once you know what you want to contribute, follow these steps to ensure the quality of your contribution:
+
+1. **Track it.** Create an issue if it doesn’t already exist.
+2. **Learn enough context.** Read the relevant documentation and code inside and outside of AdalFlow.
+   This includes:
+
+   - :ref:`tutorials<source-tutorials>`
+   - :ref:`use_cases<use_cases>`
+   - :ref:`API references<apis>`
+   - tests within `/adalflow/tests` to understand everything you need to know.
+
+   This will help you understand everything necessary for your contribution.
+   We’ll provide examples for each type of contribution in the next section.
+
+   - For integration, you need to know the relevant SDKs and APIs.
+   - For documentation, you need to know the structure of the documentation and the writing style.
+
+3. **Create a solution proposal and gather input.** Write your solution proposal in the issue comments.
+   Alternatively, you can use a publically accessible tool like ``Google Doc`` or ``Colab`` to share your proposal.
+   The `AdalFlow` team and the community will review and provide feedback before your start coding.
+   The team and core contributors can label it as ``ready-for-pr`` when it is ready for a PR.
+   This step is especially crucial for complex features. You can also discuss your proposal on our `Discord <https://discord.com/invite/ezzszrRZvT>`_.
+
+4. **Work on it.**  Follow the `PR & Review Process <#pr-review-process>`_ to begin coding, testing, documenting, and reviewing.
+
+
+.. TODO: edit the remaining content
+
+Integrate a  model_client
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All existing :ref:`model clients<components-model_client>` are located in the `/adalflow/adalflow/components/model_client` directory.
+The tutorial :ref:`model_client<tutorials-model_client>` will help you understand the base class and how it is designed.
+
+In general, `OpenAI SDK <https://platform.openai.com/docs/quickstart>`_ is trending to the the industry standard.
+And you can measure your targetting SDKs by the difference between these two.
+But overall, the OPENAI integration consists of: coding, testing, documentation.
+
+Coding includes:
+
+1. A :ref:`OpenAIClient<components-model_client-openai_client>` class that inherits from the base class :ref:`ModelClient<core.model_client>`.
+2. Add the `sdk package` as an optional package in the `adalflow/pyproject.toml` file.
+3. Add the `sdk package` as lazy import in the `adalflow/adalflow/utils/lazy_import.py` file.
+4. Call the lazy import in the `adalflow/adalflow/components/model_client/__init__.py` file.
+5. Import the new client in the `adalflow/adalflow/__init__.py` file so that we can call it directly from the `adalflow` package.
+
+Testing includes:
+
+1. Create a test file `test_XX_client.py` in the `adalflow/tests/` directory. You can use `test_openai_client.py` as a reference.
+2. Add the package to the `adalflow/pyproject.toml` file under the `[tool.poetry.group.test.dependencies]` section.
+3. Add the test case for the new client in the test file.
+4. Follow the `adalflow/tests/README.md` to run the test.
+
+Documentation includes:
+
+1. Add examples on how to use the new client in the `tutorials` directory. You can use `tutorials/ollama_client_note.py` as a reference.
+2. Make sure you add the new client package in the root `pyproject.toml` file under the `[tool.poetry.dependencies]` section.
+3. Ensure the API reference is correctly rendenered in the `docs` directory.
+   For example, with `ollama_client`, you need to add the following line in the `docs/source/apis/components/index.rst` file:
+
+   .. code-block:: text
+
+      components.model_client.ollama_client
+
+4. Add examplary API configurations in the root `.env_example` file.
+
+
+This `ollama_client PR <https://github.com/SylphAI-Inc/AdalFlow/pull/97>`_ is a good example of how to integrate a new model client.
+
+Integrate a database retriever
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All existing :ref:`retrievers<components-retriever>` are located in the `/adalflow/adalflow/components/retriever` directory.
+The tutorial :ref:`retriever<tutorials-retriever>` will help you understand the base class and how it is designed.
+
+The process is quite similar to integrating a model client. For documentation, ensure you add an example in :ref:`retriever<tutorials-retriever>`.
 
-* Join us on `Discord <https://discord.com/invite/ezzszrRZvT>`_. We are glad to discuss with you and know what you are interested in here.
+This `qdrant_retriever PR <https://github.com/SylphAI-Inc/AdalFlow/pull/165>`_ is a good example of how to integrate a new database retriever.
 
-Figure out the scope of your change
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-**Small:** Most of the pull requests are small. If your change is small, such as fixing a line of bug, please go ahead to push it.
+Add notebooks for existing/new tutorials/use_cases/benchmarks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-**Big:** But if you are making a new feature, or planning to push a large change, it is recommended to contact us on `Discord <https://discord.com/invite/ezzszrRZvT>`_ first.
+.. figure:: ../_static/images/adalflow_code_samples.png
+   :alt: Code samples for tutorials
 
-**Unknown:** If you have no idea how big it will be, we are here to help you. Please post your idea on `issues <https://github.com/SylphAI-Inc/LightRAG/issues>`_. We will read it carefully and get back to you.
+   :align: center
+   :width: 700px
 
-Add your code
+   Code samples for tutorials
+
+
+.. note::
+  For how to add a new notebook, please follow the `README.md <https://github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/README.md>`_ in the `notebooks` directory.
+
+**Tutorials**
+
+For :ref:`tutorials<tutorials>` in our documentation, each tutorial is accompanied by two code files: one `XX_note.py` in `/tutorials` and one `adalflow_XX.ipynb` in `/notebooks/tutorials`.
+You can help add the missing code file in tutorials and make sure to link them in the documentation like the above figure.
+Here is one example issue and PR for adding a new tutorial: `Issue 192 <https://github.com/SylphAI-Inc/AdalFlow/issues/192>`_ and `PR 261 <https://github.com/SylphAI-Inc/AdalFlow/pull/261>`_.
+
+**Use Cases**
+
+For :ref:`use_cases<use_cases>` in our documentation, each use case is accompanied by source code in `/use_cases` and a notebook in `/notebooks/use_cases`.
+For our existing use cases, we do not always have the corresponding notebooks. You can help add the missing notebooks for the existing use cases.
+
+**Benchmarks**
+
+So far, we are still working on the code in the `/benchmarks` directory. We will need help on addint the documentation along with the code.
+
+Part 4: Pull Request Process
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Please check our `code contribution guidelines <./contribute_to_code.html>`_ to work with code.
 
-Pull requests
+Prepare the codebase
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Before you can start a pull request, you need to follow these steps and this `Github official fork guide <https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo?tool=webui>`_:
+
+1. **Fork the repository.**
+   You can fork the repository by clicking the `Fork` button on the top right corner of the repository page.
+
+2. **Clone the repository.**
+   You can clone the repository by running the following command in your terminal:
+
+   .. code-block:: bash
+
+      git clone your_forked_repository_url
+
+3. **Sync your fork.**
+   Also, make sure your repository is in sync with the original owner's repository. You can do this by running the following commands:
+
+   .. code-block:: bash
+
+      git remote -v
+
+   You will not see our repo in the list. You can add it by running the following command:
+
+   .. code-block:: bash
+
+      git remote add upstream https://github.com/SylphAI-Inc/AdalFlow.git
+
+   Now, when you run `git remote -v`, you will see the upstream repo.
+   Then, we can sync your fork with the upstream repo by running the following commands:
+
+   .. code-block:: bash
+
+      git fetch upstream
+      git checkout main
+      git merge upstream/main
+
+   *Note: `fetch` will fetch the changes from the upstream repo, but it will not merge them into your local branch. `merge` will merge the changes from the upstream repo into your local branch.*
+   For more detials, please refer to the `Github official syncing a fork guide <https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork>`_.
+
+4. **Create a new branch.**
+   Create a new branch to ensure your changes are isolated from the main codebase. You can do this by running the following command:
+
+   .. code-block:: bash
+
+      git checkout -b <issue_number>_<issue_title>
+
+   Similarly, you always use step 3 to sync your branch with the upstream repo.
+   Additionally, you can use the following commands to sync:
+
+   .. code-block:: bash
+
+     git fetch --all --prune
+     git rebase upstream/main
+     # follow git instructions to resolve conflicts
+     git push origin your_branch_name
+
+
+Work on your PR
+~~~~~~~~~~~~~~~~~~~~
+
+1. **Set up the pre-commit hooks.**
+   We have a `.pre-commit-config.yaml` file in the root directory.
+   Ensure you have set up the pre-commit hooks. We recommend you to do so in the `poetry` environment.
+   The following steps will help you set up the root poetry environment and the pre-commit hooks:
+   Install `poetry` if you haven't already:
+
+   .. code-block:: bash
+
+    pip install poetry
+
+   You can install the dependencies by running the following command:
+
+   .. code-block:: bash
+
+    poetry install
+
+   Then you can activate the environment by running the following command:
+
+   .. code-block:: bash
+
+    poetry shell
+
+   Then, install the pre-commit hooks by running the following command:
+
+   .. code-block:: bash
+
+    pre-commit install
+
+   *Now, you can start to commit your changes from the `/adalflow` directory next time even if you are not in the poetry environment.*
+   If you have more questions, you can refer to the `pre-commit official guide <https://pre-commit.com/#install>`_.
+
+2. **Commit your changes.**
+   Once you have made your changes, you can commit them by running the following commands:
+
+   .. code-block:: bash
+
+      git add .
+      git commit -m "Your commit message"
+      git push origin your_branch_name
+
+   If you face "permission denied" issue, you can refer to this `medium blog <https://medium.com/geekculture/how-to-change-your-github-remote-authentication-from-username-password-to-personal-access-token-64e527a766cf>`_ for help.
+
+3. **Create a Pull Request.**
+   Go to your forked repository on Github and click the `New Pull Request` button. Make sure you select the correct branch for the base and compare branches.
+   Here we have a default `PR template <https://github.com/SylphAI-Inc/adalflow/blob/main/.github/PULL_REQUEST_TEMPLATE.md>`_ for you to fill in.
+
+4. **Fill in the PR template.**
+   Make sure you fill in the PR template with the necessary information. This will help the reviewers understand your changes better.
+
+5. **Submit the PR**
+   We encourage you to submit the PR as soon as possible, even if it is not ready for review. You can mark it as a draft by:
+   1. Clicking the `Draft` button on the PR page.
+   2. Adding `[WIP]` to the PR title.
+
+   .. figure:: ../_static/images/pr_draft.png
+      :alt: Create a draft PR
+      :align: center
+      :width: 700px
+
+      Create a draft PR
+
+6. **Iterate your PR.**
+   Once you have submitted the PR, the reviewers will review your changes and provide feedback. You can iterate your PR by making the necessary changes and pushing them to your branch. The reviewers will review your changes again.
+
+7. **Merge your PR.**
+   Once your PR is approved, the reviewers will merge your PR for you. You can also merge your PR by clicking the `Merge` button on the PR page.
+
+
+
+Part 5: Review Process
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-**WIP PR:** If you are working on an in pull request that is not ready for review, you can create a PR with **"[WIP]"** to inform us that this PR is a draft **“work in progress”**.
+For now, we will use the `PyTorch lightning's review guideline <https://github.com/Lightning-AI/pytorch-lightning/wiki/Review-guidelines>`_.
+
+.. Environment
+.. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. When contributing, please note:
+.. LightRAG separates the source code environment and documentation environment.
+
+.. * To activate the code environment, you should run ``poetry install`` and ``poetry shell`` under ``./lightrag``. The ``./lightrag/pyproject.toml`` contains the dependencies for the ``LightRAG`` package.
+
+.. * To activate the documentation environment, you can run ``poetry install`` and ``poetry shell`` under ``.``. The ``./pyproject.toml`` controls documentation dependencies.
 
-**Finished PR:** You can name your finished PR as **"[New Retriever Integration]"** for example.
-We will carry out code review regularly and provide feedbacks as soon as possible.
-Please iterate your PR with the feedbacks. We will try our best to reduce the revision workload on your side.
-Once your PR is approved, we will merge the PR for you.
-If you have any concerns about our feedbacks, please feel free to contact us on `Discord <https://discord.com/invite/ezzszrRZvT>`_.
 
-Writing Documentation
-----------------------------
-It is a good practice to submit your code with documentations to help the ``LightRAG`` team and other developers better understand your updates.
-Please see our `documentation contribution guidelines <./contribute_to_document.html>`_ for more details on ``LightRAG`` documentation standard.
 
 
 
 
-.. admonition:: Resources
-   :class: highlight
+.. .. admonition:: Resources
+..    :class: highlight
diff --git a/docs/source/contributor/index.rst b/docs/source/contributor/index.rst
index efd71355c..7e2c2da0b 100644
--- a/docs/source/contributor/index.rst
+++ b/docs/source/contributor/index.rst
@@ -1,27 +1,34 @@
-Contributing
+Contributor Guide
 =============================
-Welcome to the AdalFlow community! We're building the most user-friendly, modular library for building and auto-optimizing LLM applications, from Chatbots, RAGs, to Agents.
-Think of AdalFlow to LLM applications and in-context learning is like PyTorch/TensorFlow/JAX for AI modeling.
-The goal is to provide basic and foudamental building blocks to build advanced applications with auto-optimization out-of-the-box.
-As we mature, we might see more RAG, memory-based chatbots, or agents frameworks will be built on top of AdalFlow building blocks such as retriever, generator.
+Welcome to the AdalFlow community! We're building the most user-friendly, modular, and powerful library for building and auto-optimizing LLM applications, from Chatbots and RAGs to Agents.
+*Think of AdalFlow for LLM applications and prompt engineering as the PyTorch/TensorFlow/JAX equivalent for AI modeling.*
 
-We highly suggest you to read our :ref:`design principle<lightrag_design_philosophy>` before you start contributing.
+The goal of the library is to provide basic and fundamental building blocks to create advanced applications with auto-optimization out of the box.
+As we mature, we anticipate that more RAG, memory-based chatbots, or agent frameworks will be built on top of AdalFlow’s building blocks, such as `retriever` and `generator`.
+We highly suggest you read our :ref:`design principle<lightrag_design_philosophy>` before you start contributing.
 
-We only accept high quality contribution.
-We appreciate contributors but we have to hold our libary responsible for users.
-Once you decide to contribute, we hope you are not just to list your name on the repo, but more importantly, you learn and improve your own skills! you support your faviroty projects and community!
+We greatly appreciate all contributions, from bug fixes to new features, and value every contributor.
+However, we must be selective to ensure our library remains reliable for users.
+We hope your contributions go beyond listing your name on the repo—our goal is for you to learn, grow your skills, support your favorite projects, and give back to the community!
 
-It took us 3 months to setup a contributing guide, as we did explore with users and think a lot on how to organize labels and what is the best process that can control the quality of our library while leveraing the open-source community. **We will continously improve our process and we welcome any suggestion and advice.**
-We are determined to make AdalFlow as great and legendary as PyTorch.
+The goal of this guide is to design the best process for maintaining the quality of our library while enabling the community to make meaningful contributions.
+It took us three months to set up this contributor guide, as we first tested the process with early contributors.
+*We are determined to make AdalFlow as great and legendary as PyTorch.*
 
-The contributing guide includes three parts:
+This guide covers the overall contributing process, along with development essentials for environment setup, coding, testing, and documentation.
+
+Here’s to the future of LLM applications!
+
+By `Li Yin <https://github.com/liyin2015>`_.
+
+.. TODO: add a relation to the whole ecosystem
 
 
 
 .. toctree::
+   :caption: Contributor Guide
    :maxdepth: 1
 
    contribution
    contribute_to_code
-   contribute_to_document
    .. version_control
diff --git a/docs/source/contributor/version_control.rst b/docs/source/contributor/version_control.rst
index 456c7528b..30c2480ec 100644
--- a/docs/source/contributor/version_control.rst
+++ b/docs/source/contributor/version_control.rst
@@ -7,7 +7,7 @@ Overview
 --------
 **The version will mainly be managed by the LightRAG team. But we are glad to share how we will release the latest version here.**
 
-This guide outlines the process for releasing a new version of ``LightRAG``. 
+This guide outlines the process for releasing a new version of ``LightRAG``.
 The workflow pipeline validates the version tag, builds the package, runs tests, publishes to PyPI, and creates a release on GitHub. The workflow is triggered by tags pushed to the **Release** branch. See `GitHub tags <https://docs.github.com/en/desktop/managing-commits/managing-tags-in-github-desktop>`_ for more details on version release tagging.
 
 Steps to Release a New Version
@@ -18,7 +18,7 @@ Steps to Release a New Version
 
       [tool.poetry]
       name = "lightrag"
-      
+
       version = "0.0.0-rc.1"
       description = "The 'PyTorch' library for LLM applications. RAG=Retriever-Agent-Generator."
 
@@ -49,7 +49,7 @@ Steps to Release a New Version
       git add lightrag/pyproject.toml
       git commit -m "new version release"
       git push origin release
-   
+
    Since the workflow only processes **tags**, your file submission will not go through the version release workflow.
 
    Only the tags you pushed will get checked.
@@ -66,7 +66,7 @@ Steps to Release a New Version
    .. code-block:: python
 
       git tags # list the existing tags
-      
+
       git tag -d <tag>
       git push origin --delete <tag>
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 18d762c3b..5f2653570 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -318,14 +318,6 @@ We are building a library that unites the two worlds, forming a healthy LLM appl
    use_cases/index
 
 
-.. toctree::
-   :glob:
-   :maxdepth: 1
-   :hidden:
-
-   apis/index
-
-
       .. :caption: Benchmarks
 
       .. Manually add documents for the code in benchmarks
@@ -346,3 +338,12 @@ We are building a library that unites the two worlds, forming a healthy LLM appl
    :hidden:
 
    contributor/index
+
+
+
+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :hidden:
+
+   apis/index
diff --git a/docs/source/integrations/index.rst b/docs/source/integrations/index.rst
new file mode 100644
index 000000000..313c2f1e2
--- /dev/null
+++ b/docs/source/integrations/index.rst
@@ -0,0 +1 @@
+The integration will be listing all existing integrations and integrations we plan to add.
diff --git a/docs/source/resources/resources.rst b/docs/source/resources/resources.rst
index 08f77d359..4affa68eb 100644
--- a/docs/source/resources/resources.rst
+++ b/docs/source/resources/resources.rst
@@ -3,7 +3,3 @@ Resources
 
 Please check the GitHub for more information:
 `GitHub repository <https://github.com/SylphAI-Inc/LightRAG>`_
-
-
-
-
diff --git a/docs/source/tutorials/base_data_class.rst b/docs/source/tutorials/base_data_class.rst
index f1f1b2437..578782f26 100644
--- a/docs/source/tutorials/base_data_class.rst
+++ b/docs/source/tutorials/base_data_class.rst
@@ -7,7 +7,10 @@
       <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_dataclasses.ipynb" target="_blank" style="margin-right: 10px;">
          <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
       </a>
-
+      <a href="https://github.com/SylphAI-Inc/AdalFlow/blob/main/tutorials/adalflow_dataclasses.py" target="_blank" style="display: flex; align-items: center; margin-right: 10px;">
+         <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+         <span style="vertical-align: middle;"> Open Source Code</span>
+      </a>
    </div>
 
 DataClass
diff --git a/docs/source/tutorials/component.rst b/docs/source/tutorials/component.rst
index a9e4ae377..649f5310d 100644
--- a/docs/source/tutorials/component.rst
+++ b/docs/source/tutorials/component.rst
@@ -1,7 +1,7 @@
 .. raw:: html
 
    <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
-      <a href="https://colab.research.google.com/drive/1aD0C8-iMB8quIn8FKhrtFAGcrboRNg2C?usp=sharing" target="_blank" style="margin-right: 10px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_component.ipynb" target="_blank" style="margin-right: 10px;">
          <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
       </a>
       <a href="https://github.com/SylphAI-Inc/LightRAG/blob/main/adalflow/adalflow/core/component.py" target="_blank" style="display: flex; align-items: center;">
diff --git a/docs/source/tutorials/text_splitter.rst b/docs/source/tutorials/text_splitter.rst
index 60541dff0..4e7da43a8 100644
--- a/docs/source/tutorials/text_splitter.rst
+++ b/docs/source/tutorials/text_splitter.rst
@@ -1,3 +1,15 @@
+.. raw:: html
+
+   <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
+      <a href="https://colab.research.google.com/github/SylphAI-Inc/LightRAG/blob/main/notebooks/tutorials/adalflow_text_splitter.ipynb" target="_blank" style="margin-right: 10px;">
+         <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
+      </a>
+      <a href="https://github.com/SylphAI-Inc/LightRAG/blob/main/adalflow/tutorials/adalflow_text_splitter.py" target="_blank" style="display: flex; align-items: center;">
+         <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+         <span style="vertical-align: middle;"> Open Source Code</span>
+      </a>
+   </div>
+
 .. _tutorials-text_splitter:
 
 
diff --git a/notebooks/README.md b/notebooks/README.md
index 2e1d4f1cc..3d2e6e94f 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -19,6 +19,11 @@ The template consists of three parts:
 2. Content section of your notebook. Link to Next that users can look at.
 3. Issues and Feedback.
 
+## If you want to use a ikernel in .ipynb to test notebooks
+
+You can use the following command to install the kernel at the root of the project:
+
+```poetry run python -m ipykernel install --user --name my-project-kernel```
 
 ## If you need to use dev api
 
diff --git a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
index 65b8509c1..ac7e3cbf5 100644
--- a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
+++ b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb
@@ -1,8121 +1,8120 @@
 {
-  "cells": [
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VVSOpjzJl_cx"
+   },
+   "source": [
+    "# 🤗 Welcome to AdalFlow!\n",
+    "## The library to build & auto-optimize any LLM task pipelines\n",
+    "\n",
+    "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help!\n",
+    "\n",
+    "\n",
+    "# Quick Links\n",
+    "\n",
+    "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+    "\n",
+    "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+    "\n",
+    "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+    "\n",
+    "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+    "\n",
+    "# Outline\n",
+    "\n",
+    "*Note: As training can consume tokens fast, and the notebook runtime will reset everytime you use, it might be better for you to learn training in your local editor.*\n",
+    "\n",
+    "This is a quick introduction of AdalFlow on question answering use case end to end\n",
+    "\n",
+    "* Trainable Task pipeline with trainable parameters\n",
+    "* Create AdalComponent for your task pipeline\n",
+    "* Use Trainer to diagnose, debug, and to train.\n",
+    "\n",
+    "You can find all source code here: https://github.com/SylphAI-Inc/AdalFlow/tree/main/use_cases/question_answering/bhh_object_count\n",
+    "\n",
+    "**Here is the more detailed tutorial for the code here: https://adalflow.sylph.ai/use_cases/question_answering.html**\n",
+    "\n",
+    "\n",
+    "# Installation\n",
+    "\n",
+    "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq`, and `faiss`(cpu version) from the extra packages.\n",
+    "\n",
+    "  ```bash\n",
+    "  pip install adalflow[openai,groq,faiss-cpu]\n",
+    "  ```\n",
+    "2. Setup  `openai` and `groq` API key in the environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "THTvmhjgfiHE"
+   },
+   "outputs": [],
+   "source": [
+    "from IPython.display import clear_output\n",
+    "\n",
+    "!pip install -U adalflow[openai,groq,datasets]\n",
+    "\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 35
+    },
+    "id": "nJteJKsNrpcu",
+    "outputId": "d9f7b4d0-d11c-480d-d858-bf9022c18998"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VVSOpjzJl_cx"
+     "data": {
+      "application/vnd.google.colaboratory.intrinsic+json": {
+       "type": "string"
       },
-      "source": [
-        "# 🤗 Welcome to AdalFlow!\n",
-        "## The library to build & auto-optimize any LLM task pipelines\n",
-        "\n",
-        "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help!\n",
-        "\n",
-        "\n",
-        "# Quick Links\n",
-        "\n",
-        "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
-        "\n",
-        "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
-        "\n",
-        "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
-        "\n",
-        "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
-        "\n",
-        "# Outline\n",
-        "\n",
-        "*Note: As training can consume tokens fast, and the notebook runtime will reset everytime you use, it might be better for you to learn training in your local editor.*\n",
-        "\n",
-        "This is a quick introduction of AdalFlow on question answering use case end to end\n",
-        "\n",
-        "* Trainable Task pipeline with trainable parameters\n",
-        "* Create AdalComponent for your task pipeline\n",
-        "* Use Trainer to diagnose, debug, and to train.\n",
-        "\n",
-        "You can find all source code here: https://github.com/SylphAI-Inc/AdalFlow/tree/main/use_cases/question_answering/bhh_object_count\n",
-        "\n",
-        "**Here is the more detailed tutorial for the code here: https://adalflow.sylph.ai/use_cases/question_answering.html**\n",
-        "\n",
-        "\n",
-        "# Installation\n",
-        "\n",
-        "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq`, and `faiss`(cpu version) from the extra packages.\n",
-        "\n",
-        "  ```bash\n",
-        "  pip install adalflow[openai,groq,faiss-cpu]\n",
-        "  ```\n",
-        "2. Setup  `openai` and `groq` API key in the environment variables"
+      "text/plain": [
+       "'0.2.0'"
       ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import adalflow as adal\n",
+    "\n",
+    "adal.__version__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "KapUyHMM07pJ"
+   },
+   "source": [
+    "## Set Environment Variables\n",
+    "\n",
+    "Run the following code and pass your api key.\n",
+    "\n",
+    "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n",
+    "\n",
+    "*Go to [OpenAI](https://platform.openai.com/docs/introduction) and [Groq](https://console.groq.com/docs/) to get API keys if you don't already have.*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "ONfzF9Puzdd_",
+    "outputId": "6a815e21-ab99-463e-c53b-e39ca2ce8f3f"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "THTvmhjgfiHE"
-      },
-      "outputs": [],
-      "source": [
-        "from IPython.display import clear_output\n",
-        "\n",
-        "!pip install -U adalflow[openai,groq,datasets]\n",
-        "\n",
-        "clear_output()"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Please enter your OpenAI API key: ··········\n",
+      "Please enter your GROQ API key: ··········\n",
+      "API keys have been set.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# Prompt user to enter their API keys securely\n",
+    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+    "groq_api_key = getpass(\"Please enter your GROQ API key, simplly press Enter if you don't have one: \")\n",
+    "\n",
+    "\n",
+    "# Set environment variables\n",
+    "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+    "os.environ['GROQ_API_KEY'] = groq_api_key\n",
+    "\n",
+    "print(\"API keys have been set.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "SfGS7iddtfpj"
+   },
+   "source": [
+    "\n",
+    "\n",
+    "# 😇 Trainable Task Pipeline\n",
+    "\n",
+    "We will create a task pipeline consists of a generator, with a customzied template, a customized output parser.\n",
+    "\n",
+    "Different from our other pipelines where the `prompt_kwargs` values are strings, but here we will use ``Parameter``. And we will set up two parameter, one is of ``ParameterType.PROMPT`` and the other of type ``ParameterType.DEMOS``. The first one will be trained by text-grad and the second will be trained by boostrap few shot optimizer.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "nHnvAbO-pXUq"
+   },
+   "outputs": [],
+   "source": [
+    "import adalflow as adal\n",
+    "import re\n",
+    "from typing import Dict, Union\n",
+    "import adalflow as adal\n",
+    "from adalflow.optim.types import ParameterType\n",
+    "\n",
+    "\n",
+    "@adal.fun_to_component\n",
+    "def parse_integer_answer(answer: str):\n",
+    "    \"\"\"A function that parses the last integer from a string using regular expressions.\"\"\"\n",
+    "    try:\n",
+    "        # Use regular expression to find all sequences of digits\n",
+    "        numbers = re.findall(r\"\\d+\", answer)\n",
+    "        if numbers:\n",
+    "            # Get the last number found\n",
+    "            answer = int(numbers[-1])\n",
+    "        else:\n",
+    "            answer = -1\n",
+    "    except ValueError:\n",
+    "        answer = -1\n",
+    "\n",
+    "    return answer\n",
+    "\n",
+    "\n",
+    "few_shot_template = r\"\"\"<START_OF_SYSTEM_PROMPT>\n",
+    "{{system_prompt}}\n",
+    "{# Few shot demos #}\n",
+    "{% if few_shot_demos is not none %}\n",
+    "Here are some examples:\n",
+    "{{few_shot_demos}}\n",
+    "{% endif %}\n",
+    "<END_OF_SYSTEM_PROMPT>\n",
+    "<START_OF_USER>\n",
+    "{{input_str}}\n",
+    "<END_OF_USER>\n",
+    "\"\"\"\n",
+    "\n",
+    "class ObjectCountTaskPipeline(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        system_prompt = adal.Parameter(\n",
+    "            data=\"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
+    "            role_desc=\"To give task instruction to the language model in the system prompt\",\n",
+    "            requires_opt=True,\n",
+    "            param_type=ParameterType.PROMPT,\n",
+    "        )\n",
+    "        few_shot_demos = adal.Parameter(\n",
+    "            data=None,\n",
+    "            role_desc=\"To provide few shot demos to the language model\",\n",
+    "            requires_opt=True,  # Changed to True for few-shot learning\n",
+    "            param_type=ParameterType.DEMOS,\n",
+    "        )\n",
+    "\n",
+    "        self.llm_counter = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=few_shot_template,\n",
+    "            prompt_kwargs={\n",
+    "                \"system_prompt\": system_prompt,\n",
+    "                \"few_shot_demos\": few_shot_demos,\n",
+    "            },\n",
+    "            output_processors=parse_integer_answer,\n",
+    "            use_cache=True,\n",
+    "        )\n",
+    "\n",
+    "    def call(\n",
+    "        self, question: str, id: str = None\n",
+    "    ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n",
+    "        output = self.llm_counter(prompt_kwargs={\"input_str\": question}, id=id)\n",
+    "        return output\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "AvZJjdzZa0cT"
+   },
+   "source": [
+    "Next, we will run this pipeline in both train and eval mode.\n",
+    "\n",
+    "#### Eval mode with GeneratorOutput\n",
+    "\n",
+    "Eval mode will output ``GeneratorOutput``.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Gks3yS8hcR6_"
+   },
+   "source": [
+    "\n",
+    "#### Train mode with different form of output\n",
+    "\n",
+    "Train mode will return ``Parameter``, where the `data` field will be the `raw_response`` from the GeneratorOutput, and we put the full GeneratorOutput at the ``full_response`` in the parameter.\n",
+    "\n",
+    "As the `data` field of the `Parameter` directly communicate with the Optimizer, which are an LLM itself, its better than they understand exactly the string response itself instead of the parsed one.\n",
+    "\n",
+    "Later you will see that we also use ``eval_input`` of the parameter to communicate with the `LossFunction` as that need the parsed final output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "eqQSFnZOpfWJ",
+    "outputId": "05b5fc83-09d1-45f4-aacc-6d460fbdd7bd"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "nJteJKsNrpcu",
-        "outputId": "d9f7b4d0-d11c-480d-d858-bf9022c18998"
-      },
-      "outputs": [
-        {
-          "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            },
-            "text/plain": [
-              "'0.2.0'"
-            ]
-          },
-          "execution_count": 2,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import adalflow as adal\n",
-        "\n",
-        "adal.__version__"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KapUyHMM07pJ"
-      },
-      "source": [
-        "## Set Environment Variables\n",
-        "\n",
-        "Run the following code and pass your api key.\n",
-        "\n",
-        "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n",
-        "\n",
-        "*Go to [OpenAI](https://platform.openai.com/docs/introduction) and [Groq](https://console.groq.com/docs/) to get API keys if you don't already have.*"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountTaskPipeline(\n",
+      "  (llm_counter): Generator(\n",
+      "    model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "    (prompt): Prompt(\n",
+      "      template: <START_OF_SYSTEM_PROMPT>\n",
+      "      {{system_prompt}}\n",
+      "      {# Few shot demos #}\n",
+      "      {% if few_shot_demos is not none %}\n",
+      "      Here are some examples:\n",
+      "      {{few_shot_demos}}\n",
+      "      {% endif %}\n",
+      "      <END_OF_SYSTEM_PROMPT>\n",
+      "      <START_OF_USER>\n",
+      "      {{input_str}}\n",
+      "      <END_OF_USER>\n",
+      "      , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "    )\n",
+      "    (model_client): OpenAIClient()\n",
+      "    (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "from adalflow.components.model_client.openai_client import OpenAIClient\n",
+    "from adalflow.components.model_client.groq_client import GroqAPIClient\n",
+    "\n",
+    "\n",
+    "if len(os.environ['OPENAI_API_KEY']) > 1:\n",
+    "  gpt_3_model = {\n",
+    "      \"model_client\": OpenAIClient(),\n",
+    "      \"model_kwargs\": {\n",
+    "          \"model\": \"gpt-3.5-turbo\",\n",
+    "          \"max_tokens\": 2000,\n",
+    "          \"temperature\": 0.0,\n",
+    "          \"top_p\": 0.99,\n",
+    "          \"frequency_penalty\": 0,\n",
+    "          \"presence_penalty\": 0,\n",
+    "          \"stop\": None,\n",
+    "      },\n",
+    "  }\n",
+    "  gpt_4o_model = {\n",
+    "      \"model_client\": OpenAIClient(),\n",
+    "      \"model_kwargs\": {\n",
+    "          \"model\": \"gpt-4o\",\n",
+    "          \"max_tokens\": 4000,\n",
+    "          \"temperature\": 0.0,\n",
+    "          \"top_p\": 0.99,\n",
+    "          \"frequency_penalty\": 0,\n",
+    "          \"presence_penalty\": 0,\n",
+    "          \"stop\": None,\n",
+    "      },\n",
+    "  }\n",
+    "\n",
+    "if len(os.environ['GROQ_API_KEY']) > 1:\n",
+    "  llama_3_1_model ={\n",
+    "      \"model_client\": GroqAPIClient(),\n",
+    "      \"model_kwargs\": {\n",
+    "          \"model\": \"llama-3.1-8b-instant\"\n",
+    "      }\n",
+    "  }\n",
+    "\n",
+    "\n",
+    "question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n",
+    "task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n",
+    "print(task_pipeline)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "DE1xNdYvcXw8",
+    "outputId": "25844c2a-5d4c-4c68-8ca5-38b79ca5b398"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ONfzF9Puzdd_",
-        "outputId": "6a815e21-ab99-463e-c53b-e39ca2ce8f3f"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Please enter your OpenAI API key: ··········\n",
-            "Please enter your GROQ API key: ··········\n",
-            "API keys have been set.\n"
-          ]
-        }
-      ],
-      "source": [
-        "import os\n",
-        "\n",
-        "from getpass import getpass\n",
-        "\n",
-        "# Prompt user to enter their API keys securely\n",
-        "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
-        "groq_api_key = getpass(\"Please enter your GROQ API key, simplly press Enter if you don't have one: \")\n",
-        "\n",
-        "\n",
-        "# Set environment variables\n",
-        "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
-        "os.environ['GROQ_API_KEY'] = groq_api_key\n",
-        "\n",
-        "print(\"API keys have been set.\")"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GeneratorOutput(id='1', data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "answer = task_pipeline(question, id=\"1\")\n",
+    "print(answer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "AGUlUsGxcaby",
+    "outputId": "8c8588fe-2994-4d9e-c2d1-26453141f43f"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "SfGS7iddtfpj"
-      },
-      "source": [
-        "\n",
-        "\n",
-        "# 😇 Trainable Task Pipeline\n",
-        "\n",
-        "We will create a task pipeline consists of a generator, with a customzied template, a customized output parser.\n",
-        "\n",
-        "Different from our other pipelines where the `prompt_kwargs` values are strings, but here we will use ``Parameter``. And we will set up two parameter, one is of ``ParameterType.PROMPT`` and the other of type ``ParameterType.DEMOS``. The first one will be trained by text-grad and the second will be trained by boostrap few shot optimizer.\n",
-        "\n",
-        "\n"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parameter(name=Generator_output, requires_opt=True, param_type=generator_output (The output of the generator.), role_desc=Output from (llm) Generator, data=To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \n",
+      "\n",
+      "You have:\n",
+      "- Flute\n",
+      "- Piano\n",
+      "- Trombone\n",
+      "- Violin\n",
+      "- Accordion\n",
+      "- Clarinet\n",
+      "- Drum\n",
+      "- Trumpet\n",
+      "\n",
+      "Counting each of these instruments, we get a total of 8 musical instruments.\n",
+      "\n",
+      "Answer: 8, predecessors={Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={})}, gradients=[],            raw_response=None, input_args={'prompt_kwargs': {'system_prompt': Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'few_shot_demos': Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'input_str': 'I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?'}, 'model_kwargs': {'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, traces={})\n",
+      "full_response: GeneratorOutput(id=None, data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# set it to train mode\n",
+    "task_pipeline.train()\n",
+    "answer = task_pipeline(question, id=\"1\")\n",
+    "print(answer)\n",
+    "print(f\"full_response: {answer.full_response}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "YDAiuFzcr4YA"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install datasets\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-Gvfcy2IcgWx"
+   },
+   "source": [
+    "### Load Datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "AYBIGsIHpjMe"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.datasets.big_bench_hard import BigBenchHard\n",
+    "from adalflow.utils.data import subset_dataset\n",
+    "\n",
+    "def load_datasets(max_samples: int = None):\n",
+    "    \"\"\"Load the dataset\"\"\"\n",
+    "    train_data = BigBenchHard(split=\"train\")\n",
+    "    val_data = BigBenchHard(split=\"val\")\n",
+    "    test_data = BigBenchHard(split=\"test\")\n",
+    "\n",
+    "    # Limit the number of samples\n",
+    "    if max_samples:\n",
+    "        train_data = subset_dataset(train_data, max_samples)\n",
+    "        val_data = subset_dataset(val_data, max_samples)\n",
+    "        test_data = subset_dataset(test_data, max_samples)\n",
+    "\n",
+    "    return train_data, val_data, test_data\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "asw-pJrid8ly",
+    "outputId": "31807c34-0de9-45e5-ebdd-778aa5313802"
+   },
+   "outputs": [
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "nHnvAbO-pXUq"
-      },
-      "outputs": [],
-      "source": [
-        "import adalflow as adal\n",
-        "import re\n",
-        "from typing import Dict, Union\n",
-        "import adalflow as adal\n",
-        "from adalflow.optim.types import ParameterType\n",
-        "\n",
-        "\n",
-        "@adal.fun_to_component\n",
-        "def parse_integer_answer(answer: str):\n",
-        "    \"\"\"A function that parses the last integer from a string using regular expressions.\"\"\"\n",
-        "    try:\n",
-        "        # Use regular expression to find all sequences of digits\n",
-        "        numbers = re.findall(r\"\\d+\", answer)\n",
-        "        if numbers:\n",
-        "            # Get the last number found\n",
-        "            answer = int(numbers[-1])\n",
-        "        else:\n",
-        "            answer = -1\n",
-        "    except ValueError:\n",
-        "        answer = -1\n",
-        "\n",
-        "    return answer\n",
-        "\n",
-        "\n",
-        "few_shot_template = r\"\"\"<START_OF_SYSTEM_PROMPT>\n",
-        "{{system_prompt}}\n",
-        "{# Few shot demos #}\n",
-        "{% if few_shot_demos is not none %}\n",
-        "Here are some examples:\n",
-        "{{few_shot_demos}}\n",
-        "{% endif %}\n",
-        "<END_OF_SYSTEM_PROMPT>\n",
-        "<START_OF_USER>\n",
-        "{{input_str}}\n",
-        "<END_OF_USER>\n",
-        "\"\"\"\n",
-        "\n",
-        "class ObjectCountTaskPipeline(adal.Component):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
-        "        super().__init__()\n",
-        "\n",
-        "        system_prompt = adal.Parameter(\n",
-        "            data=\"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
-        "            role_desc=\"To give task instruction to the language model in the system prompt\",\n",
-        "            requires_opt=True,\n",
-        "            param_type=ParameterType.PROMPT,\n",
-        "        )\n",
-        "        few_shot_demos = adal.Parameter(\n",
-        "            data=None,\n",
-        "            role_desc=\"To provide few shot demos to the language model\",\n",
-        "            requires_opt=True,  # Changed to True for few-shot learning\n",
-        "            param_type=ParameterType.DEMOS,\n",
-        "        )\n",
-        "\n",
-        "        self.llm_counter = adal.Generator(\n",
-        "            model_client=model_client,\n",
-        "            model_kwargs=model_kwargs,\n",
-        "            template=few_shot_template,\n",
-        "            prompt_kwargs={\n",
-        "                \"system_prompt\": system_prompt,\n",
-        "                \"few_shot_demos\": few_shot_demos,\n",
-        "            },\n",
-        "            output_processors=parse_integer_answer,\n",
-        "            use_cache=True,\n",
-        "        )\n",
-        "\n",
-        "    def call(\n",
-        "        self, question: str, id: str = None\n",
-        "    ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n",
-        "        output = self.llm_counter(prompt_kwargs={\"input_str\": question}, id=id)\n",
-        "        return output\n",
-        "\n",
-        "\n"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Example(id='d3f33ded-170a-4b87-9b0b-987d5fb7b817', question='I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?', answer='4')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# check the datasets\n",
+    "\n",
+    "train_data, val_data, test_data = load_datasets(max_samples=2)\n",
+    "print(train_data[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VAVtXE9xeEHt"
+   },
+   "source": [
+    "### Soft link to AdalFlow default file path\n",
+    "\n",
+    "Lets' match the default to the current project, so that you can see the downloaded data and later the checkpoints of the training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "1SaKH6dkeWus"
+   },
+   "outputs": [],
+   "source": [
+    "! ln -s /root/.adalflow /content/adalflow\n",
+    "\n",
+    "# go to files then you will see a folder named as adalflow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "YWZzOvAHenME"
+   },
+   "source": [
+    "# 😊 AdalComponent to define everything we need to train\n",
+    "\n",
+    "1. We need `backward_engine_model_config`` for ``backward_engine`` to compute gradient.\n",
+    "\n",
+    "2. We need ``text_optimizer_model_config`` for the `text optimizer` for propose new prompts.\n",
+    "\n",
+    "3. For the demo optimizer, we need a `teacher_model_config` to config a teacher generator, in this case, it is the `llm_counter`. The teacher will share the same prompt with the `llm_counter` but you can use a more advanced model.\n",
+    "\n",
+    "In general, we should have all of these parts to use a more advanced model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "9QoNoMWD0rgV"
+   },
+   "source": [
+    "## 🧑 Diagnose\n",
+    "\n",
+    "Diagnose is more of an evaluation, but with detailed logs so that you can manually inspect the wrong output.\n",
+    "\n",
+    "This one shows the minimum config you need to get the `diagnose` work."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "6mi7lM3U24Eg"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.datasets.types import Example\n",
+    "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
+    "\n",
+    "\n",
+    "class ObjectCountAdalComponent(adal.AdalComponent):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
+    "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
+    "        super().__init__(task=task, eval_fn=eval_fn)\n",
+    "\n",
+    "    def prepare_task(self, sample: Example):\n",
+    "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
+    "\n",
+    "    def prepare_eval(\n",
+    "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
+    "    ) -> float:\n",
+    "        y_label = -1\n",
+    "        if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
+    "            y_label = y_pred.data\n",
+    "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "eliPeVeM2wcP"
+   },
+   "outputs": [],
+   "source": [
+    "def diagnose(\n",
+    "    model_client: adal.ModelClient,\n",
+    "    model_kwargs: Dict,\n",
+    ") -> Dict:\n",
+    "\n",
+    "    trainset, valset, testset = load_datasets()\n",
+    "    # use max_samples=10 to test the code\n",
+    "\n",
+    "    adal_component = ObjectCountAdalComponent(model_client, model_kwargs)\n",
+    "    trainer = adal.Trainer(adaltask=adal_component)\n",
+    "    trainer.diagnose(dataset=trainset, split=\"train\")\n",
+    "    trainer.diagnose(dataset=valset, split=\"val\")\n",
+    "    trainer.diagnose(dataset=testset, split=\"test\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "nKl9clcb3dFj",
+    "outputId": "676fbb96-c70b-40ab-ea15-93ade1aa9e66"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "AvZJjdzZa0cT"
-      },
-      "source": [
-        "Next, we will run this pipeline in both train and eval mode.\n",
-        "\n",
-        "#### Eval mode with GeneratorOutput\n",
-        "\n",
-        "Eval mode will output ``GeneratorOutput``.\n",
-        "\n"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Gks3yS8hcR6_"
-      },
-      "source": [
-        "\n",
-        "#### Train mode with different form of output\n",
-        "\n",
-        "Train mode will return ``Parameter``, where the `data` field will be the `raw_response`` from the GeneratorOutput, and we put the full GeneratorOutput at the ``full_response`` in the parameter.\n",
-        "\n",
-        "As the `data` field of the `Parameter` directly communicate with the Optimizer, which are an LLM itself, its better than they understand exactly the string response itself instead of the parsed one.\n",
-        "\n",
-        "Later you will see that we also use ``eval_input`` of the parameter to communicate with the `LossFunction` as that need the parsed final output."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
+      "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
+      "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
+      "all_generators: [('llm_counter', Generator(\n",
+      "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "))]\n",
+      "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/llm_counter_call.jsonl\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "eqQSFnZOpfWJ",
-        "outputId": "05b5fc83-09d1-45f4-aacc-6d460fbdd7bd"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountTaskPipeline(\n",
-            "  (llm_counter): Generator(\n",
-            "    model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "    (prompt): Prompt(\n",
-            "      template: <START_OF_SYSTEM_PROMPT>\n",
-            "      {{system_prompt}}\n",
-            "      {# Few shot demos #}\n",
-            "      {% if few_shot_demos is not none %}\n",
-            "      Here are some examples:\n",
-            "      {{few_shot_demos}}\n",
-            "      {% endif %}\n",
-            "      <END_OF_SYSTEM_PROMPT>\n",
-            "      <START_OF_USER>\n",
-            "      {{input_str}}\n",
-            "      <END_OF_USER>\n",
-            "      , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "    )\n",
-            "    (model_client): OpenAIClient()\n",
-            "    (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "  )\n",
-            ")\n"
-          ]
-        }
-      ],
-      "source": [
-        "from adalflow.components.model_client.openai_client import OpenAIClient\n",
-        "from adalflow.components.model_client.groq_client import GroqAPIClient\n",
-        "\n",
-        "\n",
-        "if len(os.environ['OPENAI_API_KEY']) > 1:\n",
-        "  gpt_3_model = {\n",
-        "      \"model_client\": OpenAIClient(),\n",
-        "      \"model_kwargs\": {\n",
-        "          \"model\": \"gpt-3.5-turbo\",\n",
-        "          \"max_tokens\": 2000,\n",
-        "          \"temperature\": 0.0,\n",
-        "          \"top_p\": 0.99,\n",
-        "          \"frequency_penalty\": 0,\n",
-        "          \"presence_penalty\": 0,\n",
-        "          \"stop\": None,\n",
-        "      },\n",
-        "  }\n",
-        "  gpt_4o_model = {\n",
-        "      \"model_client\": OpenAIClient(),\n",
-        "      \"model_kwargs\": {\n",
-        "          \"model\": \"gpt-4o\",\n",
-        "          \"max_tokens\": 4000,\n",
-        "          \"temperature\": 0.0,\n",
-        "          \"top_p\": 0.99,\n",
-        "          \"frequency_penalty\": 0,\n",
-        "          \"presence_penalty\": 0,\n",
-        "          \"stop\": None,\n",
-        "      },\n",
-        "  }\n",
-        "\n",
-        "if len(os.environ['GROQ_API_KEY']) > 1:\n",
-        "  llama_3_1_model ={\n",
-        "      \"model_client\": GroqAPIClient(),\n",
-        "      \"model_kwargs\": {\n",
-        "          \"model\": \"llama-3.1-8b-instant\"\n",
-        "      }\n",
-        "  }\n",
-        "\n",
-        "\n",
-        "question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n",
-        "task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n",
-        "print(task_pipeline)\n"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5957.82it/s]\n",
+      "Evaluating step(0): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "DE1xNdYvcXw8",
-        "outputId": "25844c2a-5d4c-4c68-8ca5-38b79ca5b398"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "GeneratorOutput(id='1', data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
-          ]
-        }
-      ],
-      "source": [
-        "answer = task_pipeline(question, id=\"1\")\n",
-        "print(answer)"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sorted_indices: [8, 16, 23, 25, 31, 47, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49]\n",
+      "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
+      "Loading log file: llm_counter_call.jsonl\n",
+      "Total error samples: 6\n",
+      "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
+      "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
+      "all_generators: [('llm_counter', Generator(\n",
+      "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "))]\n",
+      "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val/llm_counter_call.jsonl\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "AGUlUsGxcaby",
-        "outputId": "8c8588fe-2994-4d9e-c2d1-26453141f43f"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Parameter(name=Generator_output, requires_opt=True, param_type=generator_output (The output of the generator.), role_desc=Output from (llm) Generator, data=To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \n",
-            "\n",
-            "You have:\n",
-            "- Flute\n",
-            "- Piano\n",
-            "- Trombone\n",
-            "- Violin\n",
-            "- Accordion\n",
-            "- Clarinet\n",
-            "- Drum\n",
-            "- Trumpet\n",
-            "\n",
-            "Counting each of these instruments, we get a total of 8 musical instruments.\n",
-            "\n",
-            "Answer: 8, predecessors={Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={})}, gradients=[],            raw_response=None, input_args={'prompt_kwargs': {'system_prompt': Parameter(name=To_give_ta, requires_opt=True, param_type=prompt (Instruction to the language model on task, data, and format.), role_desc=To give task instruction to the language model in the system prompt, data=You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value., predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'few_shot_demos': Parameter(name=To_provide, requires_opt=True, param_type=demos (A few examples to guide the language model.), role_desc=To provide few shot demos to the language model, data=None, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}), 'input_str': 'I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?'}, 'model_kwargs': {'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, traces={})\n",
-            "full_response: GeneratorOutput(id=None, data=8, error=None, usage=CompletionUsage(completion_tokens=77, prompt_tokens=113, total_tokens=190), raw_response='To find the total number of musical instruments you have, you simply need to count the individual instruments you listed. \\n\\nYou have:\\n- Flute\\n- Piano\\n- Trombone\\n- Violin\\n- Accordion\\n- Clarinet\\n- Drum\\n- Trumpet\\n\\nCounting each of these instruments, we get a total of 8 musical instruments.\\n\\nAnswer: 8', metadata=None)\n"
-          ]
-        }
-      ],
-      "source": [
-        "# set it to train mode\n",
-        "task_pipeline.train()\n",
-        "answer = task_pipeline(question, id=\"1\")\n",
-        "print(answer)\n",
-        "print(f\"full_response: {answer.full_response}\")"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3203.76it/s]\n",
+      "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:15<00:00,  3.26it/s]\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "YDAiuFzcr4YA"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install datasets\n",
-        "clear_output()"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sorted_indices: [1, 2, 5, 10, 24, 36, 38, 42, 44, 47, 0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 43, 45, 46, 48, 49]\n",
+      "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
+      "Loading log file: llm_counter_call.jsonl\n",
+      "Total error samples: 10\n",
+      "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
+      "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
+      "all_generators: [('llm_counter', Generator(\n",
+      "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "))]\n",
+      "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test/llm_counter_call.jsonl\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-Gvfcy2IcgWx"
-      },
-      "source": [
-        "### Load Datasets"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5545.09it/s]\n",
+      "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "AYBIGsIHpjMe"
-      },
-      "outputs": [],
-      "source": [
-        "from adalflow.datasets.big_bench_hard import BigBenchHard\n",
-        "from adalflow.utils.data import subset_dataset\n",
-        "\n",
-        "def load_datasets(max_samples: int = None):\n",
-        "    \"\"\"Load the dataset\"\"\"\n",
-        "    train_data = BigBenchHard(split=\"train\")\n",
-        "    val_data = BigBenchHard(split=\"val\")\n",
-        "    test_data = BigBenchHard(split=\"test\")\n",
-        "\n",
-        "    # Limit the number of samples\n",
-        "    if max_samples:\n",
-        "        train_data = subset_dataset(train_data, max_samples)\n",
-        "        val_data = subset_dataset(val_data, max_samples)\n",
-        "        test_data = subset_dataset(test_data, max_samples)\n",
-        "\n",
-        "    return train_data, val_data, test_data\n"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sorted_indices: [7, 18, 19, 20, 23, 24, 25, 43, 58, 59, 63, 74, 75, 79, 85, 97, 99, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 98]\n",
+      "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
+      "Loading log file: llm_counter_call.jsonl\n",
+      "Total error samples: 17\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "asw-pJrid8ly",
-        "outputId": "31807c34-0de9-45e5-ebdd-778aa5313802"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Example(id='d3f33ded-170a-4b87-9b0b-987d5fb7b817', question='I have a cauliflower, a stalk of celery, a cabbage, and a garlic. How many vegetables do I have?', answer='4')\n"
-          ]
-        }
-      ],
-      "source": [
-        "# check the datasets\n",
-        "\n",
-        "train_data, val_data, test_data = load_datasets(max_samples=2)\n",
-        "print(train_data[0])"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "diagnose(**gpt_3_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "dSu4VQri3y3D"
+   },
+   "source": [
+    "Now, you can go to `/content/adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/stats.json` to view the average score for each split. And also the `diagnose.json` for different errors.\n",
+    "\n",
+    "Here is the overall score for each split.\n",
+    "\n",
+    "| Train  | Val| Test |\n",
+    "|:--------- |:--------:| ---------:|\n",
+    "| 0.88      | 0.8   |    0.83  |\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1vzJyp-W0z7I"
+   },
+   "source": [
+    "## 🐛 Debug"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "TmlCvJu804dJ"
+   },
+   "source": [
+    "## ✅ Train\n",
+    "\n",
+    "Now, let's start training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "4TWCn0did6-K"
+   },
+   "outputs": [],
+   "source": [
+    "from adalflow.datasets.types import Example\n",
+    "\n",
+    "\n",
+    "class ObjectCountAdalComponent(adal.AdalComponent):# noqa: F811\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        model_client: adal.ModelClient,\n",
+    "        model_kwargs: Dict,\n",
+    "        backward_engine_model_config: Dict,\n",
+    "        teacher_model_config: Dict,\n",
+    "        text_optimizer_model_config: Dict,\n",
+    "    ):\n",
+    "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
+    "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
+    "        loss_fn = adal.EvalFnToTextLoss(\n",
+    "            eval_fn=eval_fn,\n",
+    "            eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n",
+    "        )\n",
+    "        super().__init__(task=task, eval_fn=eval_fn, loss_fn=loss_fn)\n",
+    "\n",
+    "        self.backward_engine_model_config = backward_engine_model_config\n",
+    "        self.teacher_model_config = teacher_model_config\n",
+    "        self.text_optimizer_model_config = text_optimizer_model_config\n",
+    "\n",
+    "    def prepare_task(self, sample: Example):\n",
+    "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
+    "\n",
+    "\n",
+    "    def prepare_eval(\n",
+    "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
+    "    ) -> float:\n",
+    "        y_label = -1\n",
+    "        if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
+    "            y_label = y_pred.data\n",
+    "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n",
+    "\n",
+    "    def prepare_loss(self, sample: Example, pred: adal.Parameter):\n",
+    "        # prepare gt parameter\n",
+    "        y_gt = adal.Parameter(\n",
+    "            name=\"y_gt\",\n",
+    "            data=sample.answer,\n",
+    "            eval_input=sample.answer,\n",
+    "            requires_opt=False,\n",
+    "        )\n",
+    "\n",
+    "        # pred's full_response is the output of the task pipeline which is GeneratorOutput\n",
+    "        pred.eval_input = pred.full_response.data\n",
+    "        return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "dezwX2yn1eQS"
+   },
+   "outputs": [],
+   "source": [
+    "def train(\n",
+    "    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle\n",
+    "    raw_shots: int = 0,\n",
+    "    bootstrap_shots: int = 1,\n",
+    "    max_steps=1,\n",
+    "    num_workers=4,\n",
+    "    strategy=\"random\",\n",
+    "    optimization_order=\"sequential\",\n",
+    "    debug=False,\n",
+    "    resume_from_ckpt=None,\n",
+    "    exclude_input_fields_from_bootstrap_demos=False,\n",
+    "):\n",
+    "    adal_component = ObjectCountAdalComponent(\n",
+    "        **gpt_3_model,\n",
+    "        teacher_model_config=gpt_4o_model,\n",
+    "        text_optimizer_model_config=gpt_4o_model,\n",
+    "        backward_engine_model_config=gpt_4o_model\n",
+    "    )\n",
+    "    print(adal_component)\n",
+    "    trainer = adal.Trainer(\n",
+    "        train_batch_size=train_batch_size,\n",
+    "        adaltask=adal_component,\n",
+    "        strategy=strategy,\n",
+    "        max_steps=max_steps,\n",
+    "        num_workers=num_workers,\n",
+    "        raw_shots=raw_shots,\n",
+    "        bootstrap_shots=bootstrap_shots,\n",
+    "        debug=debug,\n",
+    "        weighted_sampling=True,\n",
+    "        optimization_order=optimization_order,\n",
+    "        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,\n",
+    "    )\n",
+    "    print(trainer)\n",
+    "\n",
+    "    train_dataset, val_dataset, test_dataset = load_datasets()\n",
+    "    trainer.fit(\n",
+    "        train_dataset=train_dataset,\n",
+    "        val_dataset=val_dataset,\n",
+    "        test_dataset=test_dataset,\n",
+    "        debug=debug,\n",
+    "        resume_from_ckpt=resume_from_ckpt,\n",
+    "    )\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NGKYozGt60Pp"
+   },
+   "source": [
+    "We use `Sequential` in default, we will end up with 24 steps in total, 12 for text optimizer and 12 for the demo optimizer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
+    "id": "yDwLwL0L7Rsw",
+    "outputId": "1b7e413b-a1d3-4388-fc0c-ca4b1c072585"
+   },
+   "outputs": [
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VAVtXE9xeEHt"
-      },
-      "source": [
-        "### Soft link to AdalFlow default file path\n",
-        "\n",
-        "Lets' match the default to the current project, so that you can see the downloaded data and later the checkpoints of the training."
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "1SaKH6dkeWus"
-      },
-      "outputs": [],
-      "source": [
-        "! ln -s /root/.adalflow /content/adalflow\n",
-        "\n",
-        "# go to files then you will see a folder named as adalflow"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountAdalComponent(\n",
+      "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "  (task): ObjectCountTaskPipeline(\n",
+      "    (llm_counter): Generator(\n",
+      "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "      (prompt): Prompt(\n",
+      "        template: <START_OF_SYSTEM_PROMPT>\n",
+      "        {{system_prompt}}\n",
+      "        {# Few shot demos #}\n",
+      "        {% if few_shot_demos is not none %}\n",
+      "        Here are some examples:\n",
+      "        {{few_shot_demos}}\n",
+      "        {% endif %}\n",
+      "        <END_OF_SYSTEM_PROMPT>\n",
+      "        <START_OF_USER>\n",
+      "        {{input_str}}\n",
+      "        <END_OF_USER>\n",
+      "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "      )\n",
+      "      (model_client): OpenAIClient()\n",
+      "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "    )\n",
+      "  )\n",
+      "  (loss_fn): EvalFnToTextLoss()\n",
+      ")\n",
+      "Trainer(\n",
+      "  (adaltask): ObjectCountAdalComponent(\n",
+      "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "    (task): ObjectCountTaskPipeline(\n",
+      "      (llm_counter): Generator(\n",
+      "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "        (prompt): Prompt(\n",
+      "          template: <START_OF_SYSTEM_PROMPT>\n",
+      "          {{system_prompt}}\n",
+      "          {# Few shot demos #}\n",
+      "          {% if few_shot_demos is not none %}\n",
+      "          Here are some examples:\n",
+      "          {{few_shot_demos}}\n",
+      "          {% endif %}\n",
+      "          <END_OF_SYSTEM_PROMPT>\n",
+      "          <START_OF_USER>\n",
+      "          {{input_str}}\n",
+      "          <END_OF_USER>\n",
+      "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "        )\n",
+      "        (model_client): OpenAIClient()\n",
+      "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "      )\n",
+      "    )\n",
+      "    (loss_fn): EvalFnToTextLoss()\n",
+      "  )\n",
+      ")\n",
+      "raw_shots: 0, bootstrap_shots: 1\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Backward engine configured for all generators.\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YWZzOvAHenME"
-      },
-      "source": [
-        "# 😊 AdalComponent to define everything we need to train\n",
-        "\n",
-        "1. We need `backward_engine_model_config`` for ``backward_engine`` to compute gradient.\n",
-        "\n",
-        "2. We need ``text_optimizer_model_config`` for the `text optimizer` for propose new prompts.\n",
-        "\n",
-        "3. For the demo optimizer, we need a `teacher_model_config` to config a teacher generator, in this case, it is the `llm_counter`. The teacher will share the same prompt with the `llm_counter` but you can use a more advanced model.\n",
-        "\n",
-        "In general, we should have all of these parts to use a more advanced model."
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 6482.70it/s]\n",
+      "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:00<00:00, 347.01it/s]\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2017.67it/s]\n",
+      "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:00<00:00, 286.59it/s]\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "9QoNoMWD0rgV"
-      },
-      "source": [
-        "## 🧑 Diagnose\n",
-        "\n",
-        "Diagnose is more of an evaluation, but with detailed logs so that you can manually inspect the wrong output.\n",
-        "\n",
-        "This one shows the minimum config you need to get the `diagnose` work."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Initial validation score: 0.8\n",
+      "Initial test score: 0.83\n",
+      "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
+      "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6mi7lM3U24Eg"
-      },
-      "outputs": [],
-      "source": [
-        "from adalflow.datasets.types import Example\n",
-        "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
-        "\n",
-        "\n",
-        "class ObjectCountAdalComponent(adal.AdalComponent):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
-        "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
-        "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
-        "        super().__init__(task=task, eval_fn=eval_fn)\n",
-        "\n",
-        "    def prepare_task(self, sample: Example):\n",
-        "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
-        "\n",
-        "    def prepare_eval(\n",
-        "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
-        "    ) -> float:\n",
-        "        y_label = -1\n",
-        "        if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
-        "            y_label = y_pred.data\n",
-        "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 1:   0%|          | 0/13 [00:00<?, ?it/s]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.39it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1489.32it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.46it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12865.96it/s]\n",
+      "Training Step: 2:   8%|▊         | 1/13 [00:00<00:01,  8.29it/s]"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "eliPeVeM2wcP"
-      },
-      "outputs": [],
-      "source": [
-        "def diagnose(\n",
-        "    model_client: adal.ModelClient,\n",
-        "    model_kwargs: Dict,\n",
-        ") -> Dict:\n",
-        "\n",
-        "    trainset, valset, testset = load_datasets()\n",
-        "    # use max_samples=10 to test the code\n",
-        "\n",
-        "    adal_component = ObjectCountAdalComponent(model_client, model_kwargs)\n",
-        "    trainer = adal.Trainer(adaltask=adal_component)\n",
-        "    trainer.diagnose(dataset=trainset, split=\"train\")\n",
-        "    trainer.diagnose(dataset=valset, split=\"val\")\n",
-        "    trainer.diagnose(dataset=testset, split=\"test\")"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 0 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "nKl9clcb3dFj",
-        "outputId": "676fbb96-c70b-40ab-ea15-93ade1aa9e66"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
-            "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
-            "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train\n",
-            "all_generators: [('llm_counter', Generator(\n",
-            "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "))]\n",
-            "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/llm_counter_call.jsonl\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5957.82it/s]\n",
-            "Evaluating step(0): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sorted_indices: [8, 16, 23, 25, 31, 47, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49]\n",
-            "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
-            "Loading log file: llm_counter_call.jsonl\n",
-            "Total error samples: 6\n",
-            "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
-            "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val\n",
-            "all_generators: [('llm_counter', Generator(\n",
-            "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "))]\n",
-            "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_val/llm_counter_call.jsonl\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3203.76it/s]\n",
-            "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:15<00:00,  3.26it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sorted_indices: [1, 2, 5, 10, 24, 36, 38, 42, 44, 47, 0, 3, 4, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 41, 43, 45, 46, 48, 49]\n",
-            "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
-            "Loading log file: llm_counter_call.jsonl\n",
-            "Total error samples: 10\n",
-            "Save diagnose to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
-            "Saving traces to /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test\n",
-            "all_generators: [('llm_counter', Generator(\n",
-            "  model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "))]\n",
-            "Registered callback for llm_counter, file path: /root/.adalflow/ckpt/ObjectCountAdalComponent/diagnose_test/llm_counter_call.jsonl\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5545.09it/s]\n",
-            "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sorted_indices: [7, 18, 19, 20, 23, 24, 25, 43, 58, 59, 63, 74, 75, 79, 85, 97, 99, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 98]\n",
-            "sorted_scores: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
-            "Loading log file: llm_counter_call.jsonl\n",
-            "Total error samples: 17\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "diagnose(**gpt_3_model)"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 384.73it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 927.64it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 754.71it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12087.33it/s]\n",
+      "Training Step: 3:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dSu4VQri3y3D"
-      },
-      "source": [
-        "Now, you can go to `/content/adalflow/ckpt/ObjectCountAdalComponent/diagnose_train/stats.json` to view the average score for each split. And also the `diagnose.json` for different errors.\n",
-        "\n",
-        "Here is the overall score for each split.\n",
-        "\n",
-        "| Train  | Val| Test |\n",
-        "|:--------- |:--------:| ---------:|\n",
-        "| 0.88      | 0.8   |    0.83  |\n",
-        "\n"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 1 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1vzJyp-W0z7I"
-      },
-      "source": [
-        "## 🐛 Debug"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.44it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 2761.68it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 810.38it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11320.66it/s]\n",
+      "Training Step: 4:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TmlCvJu804dJ"
-      },
-      "source": [
-        "## ✅ Train\n",
-        "\n",
-        "Now, let's start training."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 2 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "4TWCn0did6-K"
-      },
-      "outputs": [],
-      "source": [
-        "from adalflow.datasets.types import Example\n",
-        "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n",
-        "\n",
-        "\n",
-        "class ObjectCountAdalComponent(adal.AdalComponent):\n",
-        "    def __init__(\n",
-        "        self,\n",
-        "        model_client: adal.ModelClient,\n",
-        "        model_kwargs: Dict,\n",
-        "        backward_engine_model_config: Dict,\n",
-        "        teacher_model_config: Dict,\n",
-        "        text_optimizer_model_config: Dict,\n",
-        "    ):\n",
-        "        task = ObjectCountTaskPipeline(model_client, model_kwargs)\n",
-        "        eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n",
-        "        loss_fn = adal.EvalFnToTextLoss(\n",
-        "            eval_fn=eval_fn,\n",
-        "            eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n",
-        "        )\n",
-        "        super().__init__(task=task, eval_fn=eval_fn, loss_fn=loss_fn)\n",
-        "\n",
-        "        self.backward_engine_model_config = backward_engine_model_config\n",
-        "        self.teacher_model_config = teacher_model_config\n",
-        "        self.text_optimizer_model_config = text_optimizer_model_config\n",
-        "\n",
-        "    def prepare_task(self, sample: Example):\n",
-        "        return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n",
-        "\n",
-        "\n",
-        "    def prepare_eval(\n",
-        "        self, sample: Example, y_pred: adal.GeneratorOutput\n",
-        "    ) -> float:\n",
-        "        y_label = -1\n",
-        "        if (y_pred is not None and y_pred.data is not None):  # if y_pred and y_pred.data: might introduce bug when the data is 0\n",
-        "            y_label = y_pred.data\n",
-        "        return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n",
-        "\n",
-        "    def prepare_loss(self, sample: Example, pred: adal.Parameter):\n",
-        "        # prepare gt parameter\n",
-        "        y_gt = adal.Parameter(\n",
-        "            name=\"y_gt\",\n",
-        "            data=sample.answer,\n",
-        "            eval_input=sample.answer,\n",
-        "            requires_opt=False,\n",
-        "        )\n",
-        "\n",
-        "        # pred's full_response is the output of the task pipeline which is GeneratorOutput\n",
-        "        pred.eval_input = pred.full_response.data\n",
-        "        return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 234.44it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 2487.72it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1024.88it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12018.06it/s]\n",
+      "Training Step: 5:  31%|███       | 4/13 [00:00<00:00, 11.90it/s]"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "dezwX2yn1eQS"
-      },
-      "outputs": [],
-      "source": [
-        "def train(\n",
-        "    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle\n",
-        "    raw_shots: int = 0,\n",
-        "    bootstrap_shots: int = 1,\n",
-        "    max_steps=1,\n",
-        "    num_workers=4,\n",
-        "    strategy=\"random\",\n",
-        "    optimization_order=\"sequential\",\n",
-        "    debug=False,\n",
-        "    resume_from_ckpt=None,\n",
-        "    exclude_input_fields_from_bootstrap_demos=False,\n",
-        "):\n",
-        "    adal_component = ObjectCountAdalComponent(\n",
-        "        **gpt_3_model,\n",
-        "        teacher_model_config=gpt_4o_model,\n",
-        "        text_optimizer_model_config=gpt_4o_model,\n",
-        "        backward_engine_model_config=gpt_4o_model\n",
-        "    )\n",
-        "    print(adal_component)\n",
-        "    trainer = adal.Trainer(\n",
-        "        train_batch_size=train_batch_size,\n",
-        "        adaltask=adal_component,\n",
-        "        strategy=strategy,\n",
-        "        max_steps=max_steps,\n",
-        "        num_workers=num_workers,\n",
-        "        raw_shots=raw_shots,\n",
-        "        bootstrap_shots=bootstrap_shots,\n",
-        "        debug=debug,\n",
-        "        weighted_sampling=True,\n",
-        "        optimization_order=optimization_order,\n",
-        "        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,\n",
-        "    )\n",
-        "    print(trainer)\n",
-        "\n",
-        "    train_dataset, val_dataset, test_dataset = load_datasets()\n",
-        "    trainer.fit(\n",
-        "        train_dataset=train_dataset,\n",
-        "        val_dataset=val_dataset,\n",
-        "        test_dataset=test_dataset,\n",
-        "        debug=debug,\n",
-        "        resume_from_ckpt=resume_from_ckpt,\n",
-        "    )\n"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 3 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NGKYozGt60Pp"
-      },
-      "source": [
-        "We use `Sequential` in default, we will end up with 24 steps in total, 12 for text optimizer and 12 for the demo optimizer."
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 4552.84it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 392.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 770.69it/s]\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "yDwLwL0L7Rsw",
-        "outputId": "1b7e413b-a1d3-4388-fc0c-ca4b1c072585"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountAdalComponent(\n",
-            "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "  (task): ObjectCountTaskPipeline(\n",
-            "    (llm_counter): Generator(\n",
-            "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "      (prompt): Prompt(\n",
-            "        template: <START_OF_SYSTEM_PROMPT>\n",
-            "        {{system_prompt}}\n",
-            "        {# Few shot demos #}\n",
-            "        {% if few_shot_demos is not none %}\n",
-            "        Here are some examples:\n",
-            "        {{few_shot_demos}}\n",
-            "        {% endif %}\n",
-            "        <END_OF_SYSTEM_PROMPT>\n",
-            "        <START_OF_USER>\n",
-            "        {{input_str}}\n",
-            "        <END_OF_USER>\n",
-            "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "      )\n",
-            "      (model_client): OpenAIClient()\n",
-            "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "    )\n",
-            "  )\n",
-            "  (loss_fn): EvalFnToTextLoss()\n",
-            ")\n",
-            "Trainer(\n",
-            "  (adaltask): ObjectCountAdalComponent(\n",
-            "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "    (task): ObjectCountTaskPipeline(\n",
-            "      (llm_counter): Generator(\n",
-            "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "        (prompt): Prompt(\n",
-            "          template: <START_OF_SYSTEM_PROMPT>\n",
-            "          {{system_prompt}}\n",
-            "          {# Few shot demos #}\n",
-            "          {% if few_shot_demos is not none %}\n",
-            "          Here are some examples:\n",
-            "          {{few_shot_demos}}\n",
-            "          {% endif %}\n",
-            "          <END_OF_SYSTEM_PROMPT>\n",
-            "          <START_OF_USER>\n",
-            "          {{input_str}}\n",
-            "          <END_OF_USER>\n",
-            "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "        )\n",
-            "        (model_client): OpenAIClient()\n",
-            "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "      )\n",
-            "    )\n",
-            "    (loss_fn): EvalFnToTextLoss()\n",
-            "  )\n",
-            ")\n",
-            "raw_shots: 0, bootstrap_shots: 1\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Backward engine configured for all generators.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 6482.70it/s]\n",
-            "Evaluating step(0): 0.8 across 50 samples, Max potential: 0.8: 100%|██████████| 50/50 [00:00<00:00, 347.01it/s]\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2017.67it/s]\n",
-            "Evaluating step(0): 0.83 across 100 samples, Max potential: 0.83: 100%|██████████| 100/100 [00:00<00:00, 286.59it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Initial validation score: 0.8\n",
-            "Initial test score: 0.83\n",
-            "Checkpoint path: /root/.adalflow/ckpt/ObjectCountAdalComponent\n",
-            "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 1:   0%|          | 0/13 [00:00<?, ?it/s]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.39it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1489.32it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.46it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12865.96it/s]\n",
-            "Training Step: 2:   8%|▊         | 1/13 [00:00<00:01,  8.29it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 0 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 384.73it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 927.64it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 754.71it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12087.33it/s]\n",
-            "Training Step: 3:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 1 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.44it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 2761.68it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 810.38it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11320.66it/s]\n",
-            "Training Step: 4:  15%|█▌        | 2/13 [00:00<00:01,  8.92it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 2 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 234.44it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 2487.72it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1024.88it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12018.06it/s]\n",
-            "Training Step: 5:  31%|███       | 4/13 [00:00<00:00, 11.90it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 3 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 4552.84it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 392.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 770.69it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 0.0\n",
-            "Subset loss backward time: 5.383355617523193\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 225.14it/s]\n",
-            "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.43it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.28it/s]\n",
-            "Evaluating step(4): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:03<?, ?it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 1.0 >= 0.75\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1139.66it/s]\n",
-            "Evaluating step(5): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer step: 0.84 > 0.8\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 1658.72it/s]\n",
-            "Evaluating step(4): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:29<00:00,  3.37it/s]\n",
-            "Training Step: 6:  38%|███▊      | 5/13 [00:56<02:18, 17.27s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.97it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 494.99it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 805.09it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 4.081957817077637\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 538.35it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.13it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.13s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 151.18it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 204.61it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.66s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 698.62it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.41it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.61s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 116.83it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.50it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.88s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 399.65it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.09it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.69s/it]\n",
-            "Training Step: 7:  46%|████▌     | 6/13 [01:09<01:53, 16.18s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 59.06it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 410.78it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 4694.24it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.875\n",
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 3.0843119621276855\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 154.50it/s]\n",
-            "Evaluating step(6): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.52it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 279.47it/s]\n",
-            "Evaluating step(6): 0.875 across 8 samples, Max potential: 0.875: 100%|██████████| 8/8 [00:01<00:00,  4.43it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:04<?, ?it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 0.875 >= 0.875\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2336.58it/s]\n",
-            "Evaluating step(7): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:17<00:00,  2.88it/s]\n",
-            "Training Step: 8:  54%|█████▍    | 7/13 [01:37<01:58, 19.81s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.84 <= 0.84\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 148.75it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 345.11it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7550.50it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.9166666666666666\n",
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 2.337067127227783\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 193.84it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.16it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.39s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 147.89it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.04it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:04<00:07,  2.41s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 423.61it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 556.86it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.78s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 532.41it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 522.78it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 284.18it/s]\n",
-            "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 160.35it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.59s/it]\n",
-            "Training Step: 9:  62%|██████▏   | 8/13 [01:50<01:27, 17.55s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.73it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.62it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 342.85it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7157.52it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 14\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.875\n",
-            "Moving batch correct size: 14\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 7.823317050933838\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 166.50it/s]\n",
-            "Evaluating step(8): 0.8333 across 6 samples, Max potential: 0.8333: 100%|██████████| 6/6 [00:02<00:00,  2.78it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 0.8333333333333334 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 16/16 [00:00<00:00, 481.75it/s]\n",
-            "Evaluating step(8): 0.875 across 16 samples, Max potential: 0.875: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:06<?, ?it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 0.875 >= 0.875\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1112.82it/s]\n",
-            "Evaluating step(9): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer step: 0.86 > 0.84\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2395.58it/s]\n",
-            "Evaluating step(8): 0.87 across 100 samples, Max potential: 0.87: 100%|██████████| 100/100 [00:30<00:00,  3.30it/s]\n",
-            "Training Step: 10:  69%|██████▉   | 9/13 [02:52<02:04, 31.23s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.83it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 655.18it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1241.84it/s]\n",
-            "Training Step: 11:  77%|███████▋  | 10/13 [02:55<01:07, 22.43s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 9 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 757.71it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1320.62it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Subset loss backward time: 3.768970012664795\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 125.10it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.77it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.19s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 571.28it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 429.07it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.58s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to categories and quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 111.64it/s]\n",
-            "Evaluating step(10): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.63it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 455.77it/s]\n",
-            "Evaluating step(10): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  5.14it/s]\n",
-            "Proposing:  40%|████      | 2/5 [00:06<00:09,  3.17s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 1.0 >= 0.75\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1732.93it/s]\n",
-            "Evaluating step(11): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:18<00:04,  2.21it/s]\n",
-            "Training Step: 12:  85%|████████▍ | 11/13 [03:24<00:49, 24.61s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.8048780487804879 <= 0.86\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 128.86it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.24it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 470.20it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2608.40it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 6\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 6\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 6.722561836242676\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 265.78it/s]\n",
-            "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  3.58it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.65s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 396.33it/s]\n",
-            "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 354.51it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.80s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 789.39it/s]\n",
-            "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 233.79it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 181.12it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.13it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:08<00:02,  2.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 807.04it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 275.78it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:10<00:00,  2.01s/it]\n",
-            "Training Step: 12:  92%|█████████▏| 12/13 [03:43<00:18, 18.61s/it]\n",
-            "Epoch: 100%|██████████| 1/1 [03:43<00:00, 223.37s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n",
-            "Reached max steps\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Starting step: 12\n",
-            "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 13:   0%|          | 0/12 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 13\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 158.10it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.35it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
-            "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 490.46it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1656.19it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.40it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.77it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 365.97it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9294.86it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['c42fea48-1b90-4388-92c4-b65b4356a3a2']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Trombone: 1\\n\\n  2. Violin: 1\\n\\n  3. Clarinet: 1\\n\\n  4. Accordion: 1\\n\\n  5. Flutes: 4\\n\\n  6. Trumpet: 1\\n\\n  7. Drums: 2\\n\\n  8. Piano: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 + 2 + 1 = 12\\n\\n\\n  Answer: 12'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2578.13it/s]\n",
-            "Evaluating step(13): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:23<00:13,  1.35it/s]\n",
-            "Training Step: 14:  17%|█▋        | 2/12 [00:27<04:35, 27.54s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 14\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.94it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.92it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n",
-            "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.\n",
-            "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 443.10it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3302.60it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 114.14it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 685.93it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5111.89it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1784.60it/s]\n",
-            "Evaluating step(14): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:19<00:11,  1.61it/s]\n",
-            "Training Step: 15:  33%|███▎      | 4/12 [00:52<02:10, 16.36s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 15\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.67it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.12it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
-            "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.02it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 665.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1875.18it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 160.86it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.14it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 621.42it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9054.08it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2982.93it/s]\n",
-            "Evaluating step(15): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 495.97it/s]\n",
-            "Training Step: 16:  42%|████▏     | 5/12 [00:56<01:03,  9.03s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 16\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 127.68it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
-            "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
-            "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 534.68it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.71it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10453.09it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.85it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.52it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 560.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1250.72it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3902.04it/s]\n",
-            "Evaluating step(16): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 398.91it/s]\n",
-            "Training Step: 17:  58%|█████▊    | 7/12 [00:58<00:35,  7.16s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 17\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rLoading Data: 100%|██████████| 4/4 [00:00<00:00, 106.99it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training:  75%|███████▌  | 3/4 [00:00<00:00,  3.09it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 334.77it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 874.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 370.55it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.81it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 482.84it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 645.40it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2888.08it/s]\n",
-            "Evaluating step(17): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 221.76it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 18:  67%|██████▋   | 8/12 [01:02<00:19,  4.87s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 18\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.28it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n",
-            "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n",
-            "\n",
-            "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 585.96it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.18it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1038.07it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 250.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.18it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 438.82it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2456.40it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2422.27it/s]\n",
-            "Evaluating step(18): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 456.47it/s]\n",
-            "Training Step: 19:  75%|███████▌  | 9/12 [01:05<00:13,  4.41s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 19\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 57.52it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n",
-            "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.16it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.38it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6143.25it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 107.12it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.70it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10505.46it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3644.75it/s]\n",
-            "Evaluating step(19): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 275.17it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 20:  92%|█████████▏| 11/12 [01:09<00:04,  4.32s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 20\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 125.16it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n",
-            "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 328.35it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 999.36it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.24it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 353.26it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 391.07it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1763.23it/s]\n",
-            "Evaluating step(20): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:17<00:18,  1.38it/s]\n",
-            "Training Step: 21: 100%|██████████| 12/12 [01:34<00:00,  7.82s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.68 <= 0.86, revert\n",
-            "Training Step: 21\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.10it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
-            "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
-            "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.56it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 187.26it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2595.49it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 129.91it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.30it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 689.23it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3183.48it/s]\n",
-            "Evaluating step(21): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 438.75it/s]\n",
-            "Training Step: 22: : 13it [01:38,  6.76s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 22\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 86.81it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
-            "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 143.58it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 842.95it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.83it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.36it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 326.14it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 307.38it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5440.79it/s]\n",
-            "Evaluating step(22): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:00<00:00, 303.26it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.68 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 23: : 14it [01:42,  6.13s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 23\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 91.93it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
-            "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
-            "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.56it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 63.89it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.61it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.96it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 287.69it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1938.89it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3010.90it/s]\n",
-            "Evaluating step(23): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 423.98it/s]\n",
-            "Training Step: 24: : 16it [01:48,  6.22s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
-            "Training Step: 24\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.52it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 000a3738-1f09-40b0-9f8b-2dec63a3f7f8 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.21it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 106.06it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1513.37it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 265.42it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 171.27it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 862.32it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['840d9ed5-8222-45a9-a406-7445feae9733']\n",
-            "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Table: 1\\n\\n  3. Fridge: 1\\n\\n  4. Stove: 1\\n\\n  5. Oven: 1\\n\\n  6. Toaster: 1\\n\\n  7. Couch: 1\\n\\n  8. Cars: 4\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 1 + 1 + 1 + 4 = 11\\n\\n\\n  Answer: 11'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1210.01it/s]\n",
-            "Evaluating step(24): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:18<00:00,  2.69it/s]\n",
-            "Training Step: 24: 100%|██████████| 12/12 [02:15<00:00, 11.26s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.86 <= 0.86, revert\n",
-            "Saved ckpt to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Training time: 359.32386112213135s\n",
-            "ckpt_file: /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
-        "      raw_shots=0, bootstrap_shots=1,\n",
-        "      exclude_input_fields_from_bootstrap_demos=True\n",
-        "      )"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 0.0\n",
+      "Subset loss backward time: 5.383355617523193\n",
+      "Optimizer propose...\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "KAyFhzrG_J4l"
-      },
-      "source": [
-        "Here is our scores for each step:\n",
-        "\n",
-        "\"val_scores\": [\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.8,\n",
-        "        0.84,\n",
-        "        0.84,\n",
-        "        0.84,\n",
-        "        0.84,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86,\n",
-        "        0.86\n",
-        "    ]\n",
-        "\n",
-        "  \"test_scores\": [\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.83,\n",
-        "        0.91,\n",
-        "        0.91,\n",
-        "        0.91,\n",
-        "        0.91,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87,\n",
-        "        0.87\n",
-        "    ]\n",
-        "\n",
-        "\n",
-        "It is normal when the score of the validation does not exactly match to that of the test set. You can also train with just the test set. You can modify the fit arguments as\n",
-        "\n",
-        "```\n",
-        "trainer.fit(\n",
-        "        train_dataset=train_dataset,\n",
-        "        val_dataset=test_dataset,\n",
-        "        # test_dataset=test_dataset,\n",
-        "        debug=debug,\n",
-        "        resume_from_ckpt=resume_from_ckpt,\n",
-        "    )\n",
-        "```"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "953BV81y0JFv"
-      },
-      "source": [
-        "# 🔥 Resume Checkpoint\n",
-        "\n",
-        "We might want to continue from the earlier step and to train more steps\n",
-        "\n",
-        "This is easy to do.\n",
-        "\n",
-        "**Note: Ensure you copy the path you had, and replace it, as your run might create a different file name.**"
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "kde1V1AE7Ty0",
-        "outputId": "52d69b69-0a3a-4780-ca26-25956cc023c7"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountAdalComponent(\n",
-            "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "  (task): ObjectCountTaskPipeline(\n",
-            "    (llm_counter): Generator(\n",
-            "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "      (prompt): Prompt(\n",
-            "        template: <START_OF_SYSTEM_PROMPT>\n",
-            "        {{system_prompt}}\n",
-            "        {# Few shot demos #}\n",
-            "        {% if few_shot_demos is not none %}\n",
-            "        Here are some examples:\n",
-            "        {{few_shot_demos}}\n",
-            "        {% endif %}\n",
-            "        <END_OF_SYSTEM_PROMPT>\n",
-            "        <START_OF_USER>\n",
-            "        {{input_str}}\n",
-            "        <END_OF_USER>\n",
-            "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "      )\n",
-            "      (model_client): OpenAIClient()\n",
-            "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "    )\n",
-            "  )\n",
-            "  (loss_fn): EvalFnToTextLoss()\n",
-            ")\n",
-            "Trainer(\n",
-            "  (adaltask): ObjectCountAdalComponent(\n",
-            "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "    (task): ObjectCountTaskPipeline(\n",
-            "      (llm_counter): Generator(\n",
-            "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "        (prompt): Prompt(\n",
-            "          template: <START_OF_SYSTEM_PROMPT>\n",
-            "          {{system_prompt}}\n",
-            "          {# Few shot demos #}\n",
-            "          {% if few_shot_demos is not none %}\n",
-            "          Here are some examples:\n",
-            "          {{few_shot_demos}}\n",
-            "          {% endif %}\n",
-            "          <END_OF_SYSTEM_PROMPT>\n",
-            "          <START_OF_USER>\n",
-            "          {{input_str}}\n",
-            "          <END_OF_USER>\n",
-            "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "        )\n",
-            "        (model_client): OpenAIClient()\n",
-            "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "      )\n",
-            "    )\n",
-            "    (loss_fn): EvalFnToTextLoss()\n",
-            "  )\n",
-            ")\n",
-            "raw_shots: 0, bootstrap_shots: 1\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Backward engine configured for all generators.\n",
-            "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 27:   0%|          | 0/13 [00:00<?, ?it/s]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.64it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1073.40it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.14it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1126.21it/s]\n",
-            "Training Step: 28:   0%|          | 0/13 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 0 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 604.56it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.83it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 540.00it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1445.81it/s]\n",
-            "Training Step: 29:  15%|█▌        | 2/13 [00:02<00:12,  1.15s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Skipping batch 1 as acc: 1.0\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 318.87it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.06it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 458.88it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1186.26it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.75\n",
-            "Moving batch correct size: 3\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 4.518843650817871\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 142.52it/s]\n",
-            "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.56it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.99s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities specified in the input. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 118.95it/s]\n",
-            "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.76it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:05<00:08,  2.85s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 129.26it/s]\n",
-            "Evaluating step(2): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 263.51it/s]\n",
-            "Evaluating step(2): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n",
-            "Proposing:  40%|████      | 2/5 [00:10<00:15,  5.11s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 1.0 >= 0.75\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2287.37it/s]\n",
-            "Evaluating step(29): 0.8158 across 38 samples, Max potential: 0.86:  76%|███████▌  | 38/50 [00:17<00:05,  2.17it/s]\n",
-            "Training Step: 30:  23%|██▎       | 3/13 [00:35<02:25, 14.59s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.7948717948717948 <= 0.86\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.93it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.69it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 603.76it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8825.47it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.875\n",
-            "Moving batch correct size: 7\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Subset loss backward time: 2.2182435989379883\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure you account for all items. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 160.12it/s]\n",
-            "Evaluating step(3): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.72it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.83s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 89.23it/s]\n",
-            "Evaluating step(3): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.66it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 281.73it/s]\n",
-            "Evaluating step(3): 1.0 across 8 samples, Max potential: 1.0: 100%|██████████| 8/8 [00:02<00:00,  2.96it/s]\n",
-            "Proposing:  20%|██        | 1/5 [00:08<00:34,  8.54s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 1.0 >= 0.875\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1910.10it/s]\n",
-            "Evaluating step(30): 0.72 across 25 samples, Max potential: 0.86:  50%|█████     | 25/50 [00:18<00:18,  1.38it/s]\n",
-            "Training Step: 31:  31%|███       | 4/13 [01:05<03:03, 20.39s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.6923076923076923 <= 0.86\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 310.31it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.32it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12336.19it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.9166666666666666\n",
-            "Moving batch correct size: 11\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 2.028568983078003\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 121.52it/s]\n",
-            "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.10it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 12/12 [00:00<00:00, 724.90it/s]\n",
-            "Evaluating step(4): 1.0 across 12 samples, Max potential: 1.0: 100%|██████████| 12/12 [00:03<00:00,  3.66it/s]\n",
-            "Proposing:   0%|          | 0/5 [00:05<?, ?it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 1.0 >= 0.9166666666666666\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2233.56it/s]\n",
-            "Evaluating step(31): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:16<00:01,  2.81it/s]\n",
-            "Training Step: 32:  38%|███▊      | 5/13 [01:31<02:58, 22.30s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.8333333333333334 <= 0.86\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 269.31it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.20it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 606.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.58it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 15\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.9375\n",
-            "Moving batch correct size: 15\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 3.2150633335113525\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 130.57it/s]\n",
-            "Evaluating step(5): 0.5 across 2 samples, Max potential: 0.6667:  33%|███▎      | 1/3 [00:01<00:02,  1.39s/it]INFO:backoff:Backing off call(...) for 0.2s (openai.InternalServerError: <html>\n",
-            "<head><title>500 Internal Server Error</title></head>\n",
-            "<body>\n",
-            "<center><h1>500 Internal Server Error</h1></center>\n",
-            "<hr><center>nginx</center>\n",
-            "</body>\n",
-            "</html>)\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:50<00:00, 16.89s/it]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:52<03:28, 52.11s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 645.05it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 298.94it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:53<01:07, 22.46s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 751.40it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 360.88it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:54<00:25, 12.66s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 332.13it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 276.08it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:55<00:08,  8.12s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 440.13it/s]\n",
-            "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 235.96it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:57<00:00, 11.41s/it]\n",
-            "Training Step: 33:  46%|████▌     | 6/13 [02:33<04:07, 35.35s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 317.05it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 676.47it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 543.36it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1518.44it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 7.857504606246948\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 282.66it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:02<00:00,  2.75it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.26s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 687.22it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 539.26it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:04<00:06,  2.16s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 608.62it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 246.48it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.68s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 417.60it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 422.96it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.58s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 464.91it/s]\n",
-            "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 269.93it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.67s/it]\n",
-            "Training Step: 34:  54%|█████▍    | 7/13 [02:49<02:55, 29.23s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 104.68it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 556.85it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14230.04it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Subset loss backward time: 6.2225048542022705\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 584.16it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  4.41it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.54s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 943.25it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 367.37it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.65s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 802.76it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 290.57it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 736.81it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 352.92it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.31s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 596.84it/s]\n",
-            "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 250.75it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.45s/it]\n",
-            "Training Step: 35:  62%|██████▏   | 8/13 [03:04<02:04, 24.82s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 70.79it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 388.55it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2027.46it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 5.618266582489014\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 304.00it/s]\n",
-            "Evaluating step(8): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.79it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.44s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 128.97it/s]\n",
-            "Evaluating step(8): 1.0 across 6 samples, Max potential: 1.0: 100%|██████████| 6/6 [00:01<00:00,  3.62it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass subset check: 1.0 > 0.6666666666666666\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 20/20 [00:00<00:00, 649.93it/s]\n",
-            "Evaluating step(8): 0.95 across 20 samples, Max potential: 0.95: 100%|██████████| 20/20 [00:02<00:00,  8.93it/s]\n",
-            "Proposing:  20%|██        | 1/5 [00:08<00:35,  8.79s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass full check: 0.95 >= 0.9\n",
-            "Done with proposals\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2667.62it/s]\n",
-            "Evaluating step(35): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:00<00:00, 559.52it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.8333333333333334 <= 0.86\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 36:  69%|██████▉   | 9/13 [03:21<01:29, 22.39s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 154.85it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.33it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.06it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1798.78it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.95\n",
-            "Moving batch correct size: 19\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 2.553833246231079\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 228.47it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.44it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.47s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 700.57it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 207.56it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.69s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 782.91it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 712.51it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 269.05it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 266.32it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.40s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 466.64it/s]\n",
-            "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 498.14it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.48s/it]\n",
-            "Training Step: 37:  77%|███████▋  | 10/13 [03:33<00:56, 18.97s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.54it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.77it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 561.81it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.40it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 23\n",
-            "Moving batch error size: 1\n",
-            "Moving batch acc: 0.95\n",
-            "Moving batch correct size: 19\n",
-            "Moving batch error size: 1\n",
-            "Subset Error size: 1\n",
-            "Subset Correct size: 2\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Subset loss backward time: 2.35148024559021\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 139.22it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.95it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:01<00:07,  1.81s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 277.60it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 561.39it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:02<00:04,  1.42s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 736.01it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 168.63it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:03<00:02,  1.24s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 441.77it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 518.09it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.19s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 396.70it/s]\n",
-            "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 199.84it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]\n",
-            "Training Step: 38:  85%|████████▍ | 11/13 [03:43<00:32, 16.20s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 138.49it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  6.41it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.01it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10665.74it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Moving batch correct size: 22\n",
-            "Moving batch error size: 2\n",
-            "Moving batch acc: 0.9\n",
-            "Moving batch correct size: 18\n",
-            "Moving batch error size: 2\n",
-            "Subset Error size: 2\n",
-            "Subset Correct size: 4\n",
-            "Subset score: 0.6666666666666666\n",
-            "Subset batch acc: 0.6666666666666666\n",
-            "Subset loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Subset loss backward time: 11.797855138778687\n",
-            "Optimizer propose...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 221.09it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:01<00:00,  4.45it/s]\n",
-            "\n",
-            "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.46s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 690.80it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 309.16it/s]\n",
-            "\n",
-            "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.61s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 488.13it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 365.81it/s]\n",
-            "\n",
-            "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.36s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 693.52it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 272.61it/s]\n",
-            "\n",
-            "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.35s/it]\u001b[A"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "\n",
-            "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 767.58it/s]\n",
-            "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 719.89it/s]\n",
-            "\n",
-            "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.41s/it]\n",
-            "Training Step: 38:  92%|█████████▏| 12/13 [04:02<00:20, 20.21s/it]\n",
-            "Epoch: 100%|██████████| 1/1 [04:02<00:00, 242.58s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
-            "Done with proposals\n",
-            "No proposal can improve the subset and full set, go to next step\n",
-            "Reached max steps\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Starting step: 38\n",
-            "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 39:   0%|          | 0/12 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 39\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.31it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n",
-            "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
-            "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
-            "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 812.53it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 2283.86it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11023.14it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 294.28it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 485.47it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11015.90it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['0e8910c8-703d-4766-a483-c5691125fd03']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Fridge\\n\\n  2. Chair\\n\\n  3. Bed\\n\\n  4. Oven\\n\\n  5. Microwave\\n\\n  6. Car\\n\\n\\n  There are 6 objects in total.\\n\\n\\n  Answer: 6'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3194.64it/s]\n",
-            "Evaluating step(39): 0.6818 across 22 samples, Max potential: 0.86:  44%|████▍     | 22/50 [00:15<00:19,  1.45it/s]\n",
-            "Training Step: 40:  17%|█▋        | 2/12 [00:17<02:58, 17.85s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.6521739130434783 <= 0.86, revert\n",
-            "Training Step: 40\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 697.57it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
-            "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 562.43it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 577.17it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9709.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 142.07it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.41it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 311.77it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 713.44it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3973.84it/s]\n",
-            "Evaluating step(40): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 440.54it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 41:  33%|███▎      | 4/12 [00:22<00:49,  6.19s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 41\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 155.20it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n",
-            "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n",
-            "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n",
-            "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1098.13it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 521.96it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10292.77it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.25it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.39it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.31it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1397.05it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3444.16it/s]\n",
-            "Evaluating step(41): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 318.28it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 42:  42%|████▏     | 5/12 [00:24<00:25,  3.71s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 42\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.35it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n",
-            "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n",
-            "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 522.44it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 344.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14755.69it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.06it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.03it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.94it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5319.35it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4718.96it/s]\n",
-            "Evaluating step(42): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 404.64it/s]\n",
-            "Training Step: 43:  58%|█████▊    | 7/12 [00:27<00:17,  3.51s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
-            "Training Step: 43\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 261.59it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n",
-            "\n",
-            "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 428.10it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 296.10it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11374.38it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.89it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 447.30it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 475.76it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4960.15it/s]\n",
-            "Evaluating step(43): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 464.52it/s]\n",
-            "Training Step: 44:  67%|██████▋   | 8/12 [00:30<00:10,  2.51s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
-            "Training Step: 44\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 237.83it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
-            "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
-            "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 1138.91it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 394.77it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 443.51it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.66it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.52it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 373.33it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 830.43it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1197.95it/s]\n",
-            "Evaluating step(44): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:22<00:04,  1.87it/s]\n",
-            "Training Step: 45:  75%|███████▌  | 9/12 [00:57<00:24,  8.31s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.813953488372093 <= 0.86, revert\n",
-            "Training Step: 45\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.91it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n",
-            "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
-            "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n",
-            "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 731.86it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 244.23it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 395.27it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 140.54it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 448.16it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 658.37it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2621.44it/s]\n",
-            "Evaluating step(45): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 306.53it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 46:  92%|█████████▏| 11/12 [00:59<00:06,  6.78s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 46\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 256.89it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
-            "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 426.47it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.65it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 380.40it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 251.95it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 411.12it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 511.05it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4016.92it/s]\n",
-            "Evaluating step(46): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:00<00:00, 303.81it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.813953488372093 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 47: 100%|██████████| 12/12 [01:01<00:00,  4.42s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 47\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 96.23it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
-            "\n",
-            "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 341.47it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 167.75it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 846.95it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.09it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 191.47it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 923.91it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2836.52it/s]\n",
-            "Evaluating step(47): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 371.59it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 48: : 13it [01:07,  4.63s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 48\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 189.96it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n",
-            "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 295.41it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.24it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1621.93it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 153.47it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.08it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 344.25it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s list and count the vegetables mentioned:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (two cabbages)\\n\\n  4. Garlic\\n\\n  5. Carrot\\n\\n  6. Broccoli (head of broccoli)\\n\\n  7. Potato\\n\\n  8. Celery (stalk of celery)\\n\\n  9. Lettuce (lettuce head)\\n\\n\\n  Now, let''s count each vegetable:\\n\\n\\n  1. Yam: 1\\n\\n  2. Cauliflower: 1\\n\\n  3. Cabbages: 2\\n\\n  4. Garlic: 1\\n\\n  5. Carrot: 1\\n\\n  6. Broccoli: 1\\n\\n  7. Potato: 1\\n\\n  8. Celery: 1\\n\\n  9. Lettuce: 1\\n\\n\\n  Adding them up:\\n\\n\\n  1 + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1430.74it/s]\n",
-            "Evaluating step(48): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:28<00:07,  1.41it/s]\n",
-            "Training Step: 49: : 14it [01:39, 11.59s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
-            "Training Step: 49\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.71it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
-            "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
-            "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 421.38it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 121.46it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1767.14it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 166.47it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.02it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 206.20it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 983.31it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3168.14it/s]\n",
-            "Evaluating step(49): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 492.44it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 50: : 16it [01:42,  9.33s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 50\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 108.30it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n",
-            "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n",
-            "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 220.83it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.57it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.12it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.93it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.82it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
-            "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2261.91it/s]\n",
-            "Evaluating step(50): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 281.78it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 50: 100%|██████████| 12/12 [01:49<00:00,  9.15s/it]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Training time: 352.5873613357544s\n",
-            "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        }
-      ],
-      "source": [
-        "\n",
-        "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n",
-        "\n",
-        "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
-        "                raw_shots=0, bootstrap_shots=1,\n",
-        "                resume_from_ckpt=ckpt_path,\n",
-        "                exclude_input_fields_from_bootstrap_demos=True)"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 225.14it/s]\n",
+      "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.43it/s]\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "m5fZGQqLE78r"
-      },
-      "source": [
-        "I decide to try more, this time, using strategy \"random\". And in the bootstrap demo, there is one shot, but I ensure I also add the \"input\" in the demonstration."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
     },
     {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "78JAv4ULEn07",
-        "outputId": "e87bb360-fc26-4dbd-d163-86ab32c292df"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
-            "ObjectCountAdalComponent(\n",
-            "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "  (task): ObjectCountTaskPipeline(\n",
-            "    (llm_counter): Generator(\n",
-            "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "      (prompt): Prompt(\n",
-            "        template: <START_OF_SYSTEM_PROMPT>\n",
-            "        {{system_prompt}}\n",
-            "        {# Few shot demos #}\n",
-            "        {% if few_shot_demos is not none %}\n",
-            "        Here are some examples:\n",
-            "        {{few_shot_demos}}\n",
-            "        {% endif %}\n",
-            "        <END_OF_SYSTEM_PROMPT>\n",
-            "        <START_OF_USER>\n",
-            "        {{input_str}}\n",
-            "        <END_OF_USER>\n",
-            "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "      )\n",
-            "      (model_client): OpenAIClient()\n",
-            "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "    )\n",
-            "  )\n",
-            "  (loss_fn): EvalFnToTextLoss()\n",
-            ")\n",
-            "Trainer(\n",
-            "  (adaltask): ObjectCountAdalComponent(\n",
-            "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
-            "    (task): ObjectCountTaskPipeline(\n",
-            "      (llm_counter): Generator(\n",
-            "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "        (prompt): Prompt(\n",
-            "          template: <START_OF_SYSTEM_PROMPT>\n",
-            "          {{system_prompt}}\n",
-            "          {# Few shot demos #}\n",
-            "          {% if few_shot_demos is not none %}\n",
-            "          Here are some examples:\n",
-            "          {{few_shot_demos}}\n",
-            "          {% endif %}\n",
-            "          <END_OF_SYSTEM_PROMPT>\n",
-            "          <START_OF_USER>\n",
-            "          {{input_str}}\n",
-            "          <END_OF_USER>\n",
-            "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "        )\n",
-            "        (model_client): OpenAIClient()\n",
-            "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "      )\n",
-            "    )\n",
-            "    (loss_fn): EvalFnToTextLoss()\n",
-            "  )\n",
-            ")\n",
-            "raw_shots: 0, bootstrap_shots: 1\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Backward engine configured for all generators.\n",
-            "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 51:   0%|          | 0/13 [00:00<?, ?it/s]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 415.27it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 224.54it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 423.57it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10894.30it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2199.38it/s]\n",
-            "Evaluating step(51): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.97it/s]\n",
-            "Training Step: 52:   8%|▊         | 1/13 [00:18<03:38, 18.20s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.86 <= 0.86\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.10it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 785.01it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 842.02it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6660.27it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1760.33it/s]\n",
-            "Evaluating step(52): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.96it/s]\n",
-            "Training Step: 53:  15%|█▌        | 2/13 [00:36<03:21, 18.28s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.86 <= 0.86\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.26it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:00<00:00, 988.41it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 608.29it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1177.76it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2074.29it/s]\n",
-            "Evaluating step(53): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:16<00:00,  3.07it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer step: 0.88 > 0.86\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5848.08it/s]\n",
-            "Evaluating step(53): 0.9 across 100 samples, Max potential: 0.9: 100%|██████████| 100/100 [00:30<00:00,  3.32it/s]\n",
-            "Training Step: 54:  23%|██▎       | 3/13 [01:28<05:35, 33.51s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 297.78it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.95it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 407.40it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8952.62it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1274.72it/s]\n",
-            "Evaluating step(54): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer step: 0.94 > 0.88\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 6831.78it/s]\n",
-            "Evaluating step(54): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:30<00:00,  3.33it/s]\n",
-            "Training Step: 55:  31%|███       | 4/13 [02:21<06:10, 41.21s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 152.84it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.28it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 688.86it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1318.45it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data='You will answer a reasoning question. Carefully count each item and verify your total. List each item individually, ensuring each is counted as \"1\" regardless of quantity mentioned. Show your calculations step by step. The last line of your response should be: \\'Answer: $VALUE\\' where VALUE is a numerical value.', requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2011.16it/s]\n",
-            "Evaluating step(55): 0.8696 across 23 samples, Max potential: 0.94:  46%|████▌     | 23/50 [00:15<00:17,  1.52it/s]\n",
-            "Training Step: 56:  38%|███▊      | 5/13 [02:46<04:43, 35.43s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.8333333333333334 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.66it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 646.55it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2217.45it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4572.35it/s]\n",
-            "Evaluating step(56): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:00<00:00, 390.77it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.94 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 57:  46%|████▌     | 6/13 [02:54<03:02, 26.03s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 145.48it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.76it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1437.76it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 0.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check for any grouped items and count them correctly. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1614.47it/s]\n",
-            "Evaluating step(57): 0.7857 across 14 samples, Max potential: 0.94:  28%|██▊       | 14/50 [00:19<00:50,  1.41s/it]\n",
-            "Training Step: 58:  54%|█████▍    | 7/13 [03:23<02:42, 27.04s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.7333333333333333 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 137.96it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 806.79it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11522.81it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be formatted as: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3560.17it/s]\n",
-            "Evaluating step(58): 0.88 across 25 samples, Max potential: 0.94:  50%|█████     | 25/50 [00:17<00:17,  1.45it/s]\n",
-            "Training Step: 59:  62%|██████▏   | 8/13 [03:47<02:10, 26.06s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.8461538461538461 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.90it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.70it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 552.01it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5648.89it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 0.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1770.11it/s]\n",
-            "Evaluating step(59): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:16<00:03,  2.49it/s]\n",
-            "Training Step: 60:  69%|██████▉   | 9/13 [04:13<01:43, 26.00s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.9069767441860465 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 314.86it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.10it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 722.53it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7940.00it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count for precision. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 7188.43it/s]\n",
-            "Evaluating step(60): 0.8966 across 29 samples, Max potential: 0.94:  58%|█████▊    | 29/50 [00:15<00:11,  1.84it/s]\n",
-            "Training Step: 61:  77%|███████▋  | 10/13 [04:35<01:14, 24.87s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.8666666666666667 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 95.68it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.74it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12520.31it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3036.62it/s]\n",
-            "Evaluating step(61): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.89it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.9069767441860465 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 62:  85%|████████▍ | 11/13 [04:44<00:40, 20.14s/it]\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.40it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  3.17it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.11it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14339.50it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Loss backward...\n",
-            "setting pred name Generator_outputy_pred_2 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_0 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_3 score to 1.0\n",
-            "setting pred name Generator_outputy_pred_1 score to 1.0\n",
-            "Optimizer propose...\n",
-            "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5035.06it/s]\n",
-            "Evaluating step(62): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.19it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Optimizer revert: 0.9069767441860465 <= 0.94\n",
-            "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 62:  92%|█████████▏| 12/13 [04:51<00:24, 24.28s/it]\n",
-            "Epoch:   0%|          | 0/1 [04:51<?, ?it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Reached max steps\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    {{system_prompt}}\n",
-            "    {# Few shot demos #}\n",
-            "    {% if few_shot_demos is not none %}\n",
-            "    Here are some examples:\n",
-            "    {{few_shot_demos}}\n",
-            "    {% endif %}\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <START_OF_USER>\n",
-            "    {{input_str}}\n",
-            "    <END_OF_USER>\n",
-            "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
-            ")\n",
-            "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
-            "Configuring teacher generator for Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator set: Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            "), teacher Generator(\n",
-            "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
-            "  (prompt): Prompt(\n",
-            "    template: <START_OF_SYSTEM_PROMPT>\n",
-            "    You are the feedback engine in an optimization system.\n",
-            "    \n",
-            "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
-            "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
-            "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
-            "    3. Consider the variable in the context of its peers if provided.\n",
-            "    Remember:\n",
-            "    Be concise, critical, and direct.\n",
-            "    <END_OF_SYSTEM_PROMPT>\n",
-            "    <CONVERSATION>\n",
-            "    {{conversation_sec}}\n",
-            "    </CONVERSATION>\n",
-            "    {{objective_instruction_sec}}\n",
-            "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
-            "  )\n",
-            "  (model_client): OpenAIClient()\n",
-            ")\n",
-            "Teacher generator configured.\n",
-            "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Starting step: 62\n",
-            "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 63:   0%|          | 0/12 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 63\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 175.38it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
-            "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.32it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.92it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9063.87it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 132.51it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.85it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 913.05it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1900.02it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2838.94it/s]\n",
-            "Evaluating step(63): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:31<03:48,  5.20s/it]\n",
-            "Training Step: 64:  17%|█▋        | 2/12 [00:36<06:01, 36.20s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
-            "Training Step: 64\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 173.87it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n",
-            "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace."
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.\n",
-            "\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:00<00:00,  4.64it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1138.44it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3232.60it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 151.65it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.21it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 725.72it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10845.00it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2971.02it/s]\n",
-            "Evaluating step(64): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:00<00:00, 136.83it/s]\n",
-            "Training Step: 65:  33%|███▎      | 4/12 [00:41<01:29, 11.21s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
-            "Training Step: 65\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:00<00:00,  2.54it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.89it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.70it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6304.85it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 218.71it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.19it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 858.52it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 768.93it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1635.33it/s]\n",
-            "Evaluating step(65): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Pass validation: 0.96 > 0.94\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 3294.35it/s]\n",
-            "Evaluating step(65): 0.95 across 100 samples, Max potential: 0.95: 100%|██████████| 100/100 [00:39<00:00,  2.51it/s]\n",
-            "Training Step: 66:  42%|████▏     | 5/12 [01:50<02:42, 23.20s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 66\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 186.04it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.01it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.30it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n",
-            "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 636.54it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9420.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.34it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.50it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.28it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 731.61it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1120.89it/s]\n",
-            "Evaluating step(66): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 212.00it/s]\n",
-            "Training Step: 67:  58%|█████▊    | 7/12 [01:55<01:32, 18.51s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n",
-            "Training Step: 67\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 31.60it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  25%|██▌       | 1/4 [00:01<00:05,  1.78s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  75%|███████▌  | 3/4 [00:02<00:00,  1.63it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 420.84it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 533.39it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 48.64it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 396.85it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8608.11it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4202.88it/s]\n",
-            "Evaluating step(67): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 405.51it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 68:  67%|██████▋   | 8/12 [02:02<00:47, 11.99s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 68\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 77.30it/s]\n",
-            "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training:  75%|███████▌  | 3/4 [00:01<00:00,  2.62it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.56it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10831.00it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 179.03it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.09it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 502.04it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 639.84it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3305.62it/s]\n",
-            "Evaluating step(68): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 539.54it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 69:  75%|███████▌  | 9/12 [02:09<00:32, 10.69s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 69\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 84.70it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.26s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
-            "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
-            "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 331.49it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 488.36it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 274.35it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.51it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 596.31it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14678.23it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4604.98it/s]\n",
-            "Evaluating step(69): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 88.47it/s]\n",
-            "Training Step: 70:  92%|█████████▏| 11/12 [02:13<00:08,  8.97s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Training Step: 70\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 169.70it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.03s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
-            "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.45it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n",
-            "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 285.47it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 288.20it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.75it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 293.12it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1091.27it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4087.46it/s]\n",
-            "Evaluating step(70): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 345.89it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 71: 100%|██████████| 12/12 [02:17<00:00,  6.07s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 71\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.52it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.33s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.37it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  1.92it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.50it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10369.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 43.64it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.11it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1141.46it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4407.91it/s]\n",
-            "Evaluating step(71): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 397.66it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.96 <= 0.96, revert\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Training Step: 72: : 13it [02:23,  6.04s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Training Step: 72\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 113.31it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.04it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.82it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  2.48it/s]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.86it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 170.72it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 13981.01it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.45it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:01<00:00,  2.46it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 241.42it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 322.68it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3970.90it/s]\n",
-            "Evaluating step(72): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 96.75it/s] \n",
-            "Training Step: 73: : 14it [02:30,  6.33s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Training Step: 73\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 73.23it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:05,  1.97s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
-            "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
-            "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 211.00it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1551.58it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 205.19it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.20it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1059.57it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2226.42it/s]\n",
-            "Evaluating step(73): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 91.13it/s]\n",
-            "Training Step: 74: : 16it [02:35,  6.09s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Training Step: 74\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.65it/s]\n",
-            "Training:  25%|██▌       | 1/4 [00:01<00:04,  1.36s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
-            "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n",
-            "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.74it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1086.11it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.28it/s]\n",
-            "Training: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]\n",
-            "\n",
-            "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 224.88it/s]\n",
-            "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14242.12it/s]\n"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "sampled_augmented_demos: ['b538075d-01af-4b76-b835-9005f3044609']\n",
-            "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a blackberry, a raspberry, a peach, a head of broccoli, a plum,\\n  an orange, two bananas, a grape, two garlics, a nectarine, a lettuce head, and an\\n  apple. How many fruits do I have?\\nExample: 'Let''s list each item and identify whether it is a fruit:\\n\\n\\n  1. Blackberry - Fruit\\n\\n  2. Raspberry - Fruit\\n\\n  3. Peach - Fruit\\n\\n  4. Head of broccoli - Not a fruit\\n\\n  5. Plum - Fruit\\n\\n  6. Orange - Fruit\\n\\n  7. Two bananas - Fruits (2 bananas)\\n\\n  8. Grape - Fruit\\n\\n  9. Two garlics - Not fruits\\n\\n  10. Nectarine - Fruit\\n\\n  11. Lettuce head - Not a fruit\\n\\n  12. Apple - Fruit\\n\\n\\n  Now, let''s count the fruits:\\n\\n\\n  1. Blackberry\\n\\n  2. Raspberry\\n\\n  3. Peach\\n\\n  4. Plum\\n\\n  5. Orange\\n\\n  6. Two bananas (counted as 2)\\n\\n  7. Grape\\n\\n  8. Nectarine\\n\\n  9. Apple\\n\\n\\n  Total number of fruits:\\n\\n  1 + 1 + 1 + 1 + 1 + 2 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n",
-            "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 634.93it/s]\n",
-            "Evaluating step(74): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:28<05:25,  7.07s/it]\n",
-            "Training Step: 74: 100%|██████████| 12/12 [03:12<00:00, 16.04s/it]"
-          ]
-        },
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Fail validation: 0.4 <= 0.96, revert\n",
-            "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
-            "Training time: 484.17421078681946s\n",
-            "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
-          ]
-        },
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "\n",
-        "train(debug=False, max_steps=12, strategy=\"random\",\n",
-        "                raw_shots=0, bootstrap_shots=1,\n",
-        "                resume_from_ckpt=ckpt_path,\n",
-        "                exclude_input_fields_from_bootstrap_demos=False)"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.28it/s]\n",
+      "Evaluating step(4): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:03<?, ?it/s]\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xTB4lO3PFPnP"
-      },
-      "source": [
-        "Finally, we got 96% on the val and 95% on the test!!! This is really close to GPT4o's performance. This took us 72 steps!\n",
-        "\n",
-        "The score is consistent, meaning this is a good prompt.\n",
-        "Here is our final optimized prompt:\n",
-        "\n",
-        "System:\n",
-        "\n",
-        "```\n",
-        "\n",
-        "\"prompt\": [\n",
-        "                {\n",
-        "                    \"id\": \"327b63f0-b532-435a-85d7-6137d4e52c4c\",\n",
-        "                    \"name\": \"llm_counter.system_prompt\",\n",
-        "                    \"data\": \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
-        "                    \"requires_opt\": true\n",
-        "                },\n",
-        "                {\n",
-        "                    \"id\": \"73a3953b-6351-44d8-a36f-7521db346cca\",\n",
-        "                    \"name\": \"llm_counter.few_shot_demos\",\n",
-        "                    \"data\": \"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\",\n",
-        "                    \"requires_opt\": true\n",
-        "                }\n",
-        "            ]\n",
-        "```\n",
-        "\n",
-        "\n",
-        "You will see all steps record from the log."
-      ]
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.75\n",
+      "Done with proposals\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Fr0V3XNCHAis"
-      },
-      "source": [
-        "Happy Optimizing!!!"
-      ]
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1139.66it/s]\n",
+      "Evaluating step(5): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s]\n"
+     ]
     },
     {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3Wnvqs3RyI_z"
-      },
-      "source": [
-        "# Issues and feedback\n",
-        "\n",
-        "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
-        "\n",
-        "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "provenance": []
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.84 > 0.8\n"
+     ]
     },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 1658.72it/s]\n",
+      "Evaluating step(4): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:29<00:00,  3.37it/s]\n",
+      "Training Step: 6:  38%|███▊      | 5/13 [00:56<02:18, 17.27s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.97it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 494.99it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 805.09it/s]\n"
+     ]
     },
-    "language_info": {
-      "name": "python"
-    }
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 4.081957817077637\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 538.35it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.13it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.13s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 151.18it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 204.61it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.66s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 698.62it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.41it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.61s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 116.83it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.50it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.88s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 399.65it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 571.09it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.69s/it]\n",
+      "Training Step: 7:  46%|████▌     | 6/13 [01:09<01:53, 16.18s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 59.06it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.63it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 410.78it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 4694.24it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.875\n",
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 3.0843119621276855\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 154.50it/s]\n",
+      "Evaluating step(6): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 279.47it/s]\n",
+      "Evaluating step(6): 0.875 across 8 samples, Max potential: 0.875: 100%|██████████| 8/8 [00:01<00:00,  4.43it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:04<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 0.875 >= 0.875\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2336.58it/s]\n",
+      "Evaluating step(7): 0.84 across 50 samples, Max potential: 0.84: 100%|██████████| 50/50 [00:17<00:00,  2.88it/s]\n",
+      "Training Step: 8:  54%|█████▍    | 7/13 [01:37<01:58, 19.81s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.84 <= 0.84\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 148.75it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 345.11it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7550.50it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.9166666666666666\n",
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 2.337067127227783\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 193.84it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.16it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.39s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 147.89it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.04it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:04<00:07,  2.41s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 423.61it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 556.86it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.78s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 532.41it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 522.78it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item individually, especially when quantities are specified in words. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 284.18it/s]\n",
+      "Evaluating step(7): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 160.35it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.59s/it]\n",
+      "Training Step: 9:  62%|██████▏   | 8/13 [01:50<01:27, 17.55s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.73it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.62it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 342.85it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7157.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 14\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.875\n",
+      "Moving batch correct size: 14\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 7.823317050933838\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 166.50it/s]\n",
+      "Evaluating step(8): 0.8333 across 6 samples, Max potential: 0.8333: 100%|██████████| 6/6 [00:02<00:00,  2.78it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 0.8333333333333334 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 16/16 [00:00<00:00, 481.75it/s]\n",
+      "Evaluating step(8): 0.875 across 16 samples, Max potential: 0.875: 100%|██████████| 16/16 [00:03<00:00,  5.21it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:06<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 0.875 >= 0.875\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1112.82it/s]\n",
+      "Evaluating step(9): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.86 > 0.84\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 2395.58it/s]\n",
+      "Evaluating step(8): 0.87 across 100 samples, Max potential: 0.87: 100%|██████████| 100/100 [00:30<00:00,  3.30it/s]\n",
+      "Training Step: 10:  69%|██████▉   | 9/13 [02:52<02:04, 31.23s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.83it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 655.18it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1241.84it/s]\n",
+      "Training Step: 11:  77%|███████▋  | 10/13 [02:55<01:07, 22.43s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 9 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 757.71it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1320.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Subset loss backward time: 3.768970012664795\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 125.10it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.77it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:08,  2.19s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 571.28it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 429.07it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.58s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to categories and quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 111.64it/s]\n",
+      "Evaluating step(10): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.63it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 455.77it/s]\n",
+      "Evaluating step(10): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  5.14it/s]\n",
+      "Proposing:  40%|████      | 2/5 [00:06<00:09,  3.17s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.75\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1732.93it/s]\n",
+      "Evaluating step(11): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:18<00:04,  2.21it/s]\n",
+      "Training Step: 12:  85%|████████▍ | 11/13 [03:24<00:49, 24.61s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8048780487804879 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 128.86it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.24it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 470.20it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2608.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 6\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 6\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 6.722561836242676\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 265.78it/s]\n",
+      "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  3.58it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.65s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 396.33it/s]\n",
+      "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 354.51it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.80s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, ensuring you categorize them correctly. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 789.39it/s]\n",
+      "Evaluating step(11): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 233.79it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 181.12it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.13it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:08<00:02,  2.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each relevant item, excluding any that do not fit the category. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 807.04it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 275.78it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:10<00:00,  2.01s/it]\n",
+      "Training Step: 12:  92%|█████████▏| 12/13 [03:43<00:18, 18.61s/it]\n",
+      "Epoch: 100%|██████████| 1/1 [03:43<00:00, 223.37s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n",
+      "Reached max steps\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "save to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Starting step: 12\n",
+      "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 13:   0%|          | 0/12 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 13\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 158.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.35it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
+      "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 490.46it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1656.19it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.40it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.77it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 365.97it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9294.86it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['c42fea48-1b90-4388-92c4-b65b4356a3a2']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Trombone: 1\\n\\n  2. Violin: 1\\n\\n  3. Clarinet: 1\\n\\n  4. Accordion: 1\\n\\n  5. Flutes: 4\\n\\n  6. Trumpet: 1\\n\\n  7. Drums: 2\\n\\n  8. Piano: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 + 2 + 1 = 12\\n\\n\\n  Answer: 12'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2578.13it/s]\n",
+      "Evaluating step(13): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:23<00:13,  1.35it/s]\n",
+      "Training Step: 14:  17%|█▋        | 2/12 [00:27<04:35, 27.54s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 14\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.94it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.92it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n",
+      "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.\n",
+      "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 443.10it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3302.60it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 114.14it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.59it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 685.93it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5111.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1784.60it/s]\n",
+      "Evaluating step(14): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:19<00:11,  1.61it/s]\n",
+      "Training Step: 15:  33%|███▎      | 4/12 [00:52<02:10, 16.36s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 15\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.67it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.12it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
+      "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.02it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 665.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1875.18it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 160.86it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.14it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 621.42it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9054.08it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2982.93it/s]\n",
+      "Evaluating step(15): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 495.97it/s]\n",
+      "Training Step: 16:  42%|████▏     | 5/12 [00:56<01:03,  9.03s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 16\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 127.68it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
+      "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
+      "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 534.68it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.71it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10453.09it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.85it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.52it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 560.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1250.72it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3902.04it/s]\n",
+      "Evaluating step(16): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 398.91it/s]\n",
+      "Training Step: 17:  58%|█████▊    | 7/12 [00:58<00:35,  7.16s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 17\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rLoading Data: 100%|██████████| 4/4 [00:00<00:00, 106.99it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training:  75%|███████▌  | 3/4 [00:00<00:00,  3.09it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.92it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 334.77it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 874.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 370.55it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.81it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 482.84it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 645.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2888.08it/s]\n",
+      "Evaluating step(17): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 221.76it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 18:  67%|██████▋   | 8/12 [01:02<00:19,  4.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 18\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.28it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n",
+      "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n",
+      "\n",
+      "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 585.96it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.18it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1038.07it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 250.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.18it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 438.82it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2456.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2422.27it/s]\n",
+      "Evaluating step(18): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 456.47it/s]\n",
+      "Training Step: 19:  75%|███████▌  | 9/12 [01:05<00:13,  4.41s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 19\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 57.52it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n",
+      "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.16it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 193.38it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6143.25it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 107.12it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.70it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10505.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3644.75it/s]\n",
+      "Evaluating step(19): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 275.17it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 20:  92%|█████████▏| 11/12 [01:09<00:04,  4.32s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 20\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 125.16it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n",
+      "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 328.35it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 999.36it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.24it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 353.26it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 391.07it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1763.23it/s]\n",
+      "Evaluating step(20): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:17<00:18,  1.38it/s]\n",
+      "Training Step: 21: 100%|██████████| 12/12 [01:34<00:00,  7.82s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.68 <= 0.86, revert\n",
+      "Training Step: 21\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
+      "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
+      "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.56it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 187.26it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2595.49it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 129.91it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.30it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 689.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3183.48it/s]\n",
+      "Evaluating step(21): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 438.75it/s]\n",
+      "Training Step: 22: : 13it [01:38,  6.76s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 22\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 86.81it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
+      "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 143.58it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 842.95it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 133.83it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.36it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 326.14it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 307.38it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['60866bed-8020-4610-a39a-a4a730c035db']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Bed: 1\\n\\n  2. Fridge: 1\\n\\n  3. Lamp: 1\\n\\n  4. Toaster: 1\\n\\n  5. Chairs: 4\\n\\n  6. Table: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 1 + 1 + 4 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5440.79it/s]\n",
+      "Evaluating step(22): 0.7083 across 24 samples, Max potential: 0.86:  48%|████▊     | 24/50 [00:00<00:00, 303.26it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.68 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 23: : 14it [01:42,  6.13s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 23\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 91.93it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
+      "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
+      "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.56it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 63.89it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.61it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.96it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 287.69it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1938.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3010.90it/s]\n",
+      "Evaluating step(23): 0.7812 across 32 samples, Max potential: 0.86:  64%|██████▍   | 32/50 [00:00<00:00, 423.98it/s]\n",
+      "Training Step: 24: : 16it [01:48,  6.22s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.7575757575757576 <= 0.86, revert\n",
+      "Training Step: 24\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 000a3738-1f09-40b0-9f8b-2dec63a3f7f8 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.21it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 106.06it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1513.37it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 265.42it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 171.27it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 862.32it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['840d9ed5-8222-45a9-a406-7445feae9733']\n",
+      "New prompts: [PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item, paying special attention to quantities mentioned. Verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='2f948c14-7f8f-4f46-9e23-d30598d3f47b', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Table: 1\\n\\n  3. Fridge: 1\\n\\n  4. Stove: 1\\n\\n  5. Oven: 1\\n\\n  6. Toaster: 1\\n\\n  7. Couch: 1\\n\\n  8. Cars: 4\\n\\n\\n  Now, add them up:\\n\\n\\n  1 + 1 + 1 + 1 + 1 + 1 + 1 + 4 = 11\\n\\n\\n  Answer: 11'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1210.01it/s]\n",
+      "Evaluating step(24): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:18<00:00,  2.69it/s]\n",
+      "Training Step: 24: 100%|██████████| 12/12 [02:15<00:00, 11.26s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.86 <= 0.86, revert\n",
+      "Saved ckpt to /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Training time: 359.32386112213135s\n",
+      "ckpt_file: /root/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
+    "      raw_shots=0, bootstrap_shots=1,\n",
+    "      exclude_input_fields_from_bootstrap_demos=True\n",
+    "      )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "KAyFhzrG_J4l"
+   },
+   "source": [
+    "Here is our scores for each step:\n",
+    "\n",
+    "\"val_scores\": [\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.8,\n",
+    "        0.84,\n",
+    "        0.84,\n",
+    "        0.84,\n",
+    "        0.84,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86,\n",
+    "        0.86\n",
+    "    ]\n",
+    "\n",
+    "  \"test_scores\": [\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.83,\n",
+    "        0.91,\n",
+    "        0.91,\n",
+    "        0.91,\n",
+    "        0.91,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87,\n",
+    "        0.87\n",
+    "    ]\n",
+    "\n",
+    "\n",
+    "It is normal when the score of the validation does not exactly match to that of the test set. You can also train with just the test set. You can modify the fit arguments as\n",
+    "\n",
+    "```\n",
+    "trainer.fit(\n",
+    "        train_dataset=train_dataset,\n",
+    "        val_dataset=test_dataset,\n",
+    "        # test_dataset=test_dataset,\n",
+    "        debug=debug,\n",
+    "        resume_from_ckpt=resume_from_ckpt,\n",
+    "    )\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "953BV81y0JFv"
+   },
+   "source": [
+    "# 🔥 Resume Checkpoint\n",
+    "\n",
+    "We might want to continue from the earlier step and to train more steps\n",
+    "\n",
+    "This is easy to do.\n",
+    "\n",
+    "**Note: Ensure you copy the path you had, and replace it, as your run might create a different file name.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "kde1V1AE7Ty0",
+    "outputId": "52d69b69-0a3a-4780-ca26-25956cc023c7"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountAdalComponent(\n",
+      "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "  (task): ObjectCountTaskPipeline(\n",
+      "    (llm_counter): Generator(\n",
+      "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "      (prompt): Prompt(\n",
+      "        template: <START_OF_SYSTEM_PROMPT>\n",
+      "        {{system_prompt}}\n",
+      "        {# Few shot demos #}\n",
+      "        {% if few_shot_demos is not none %}\n",
+      "        Here are some examples:\n",
+      "        {{few_shot_demos}}\n",
+      "        {% endif %}\n",
+      "        <END_OF_SYSTEM_PROMPT>\n",
+      "        <START_OF_USER>\n",
+      "        {{input_str}}\n",
+      "        <END_OF_USER>\n",
+      "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "      )\n",
+      "      (model_client): OpenAIClient()\n",
+      "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "    )\n",
+      "  )\n",
+      "  (loss_fn): EvalFnToTextLoss()\n",
+      ")\n",
+      "Trainer(\n",
+      "  (adaltask): ObjectCountAdalComponent(\n",
+      "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "    (task): ObjectCountTaskPipeline(\n",
+      "      (llm_counter): Generator(\n",
+      "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "        (prompt): Prompt(\n",
+      "          template: <START_OF_SYSTEM_PROMPT>\n",
+      "          {{system_prompt}}\n",
+      "          {# Few shot demos #}\n",
+      "          {% if few_shot_demos is not none %}\n",
+      "          Here are some examples:\n",
+      "          {{few_shot_demos}}\n",
+      "          {% endif %}\n",
+      "          <END_OF_SYSTEM_PROMPT>\n",
+      "          <START_OF_USER>\n",
+      "          {{input_str}}\n",
+      "          <END_OF_USER>\n",
+      "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "        )\n",
+      "        (model_client): OpenAIClient()\n",
+      "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "      )\n",
+      "    )\n",
+      "    (loss_fn): EvalFnToTextLoss()\n",
+      "  )\n",
+      ")\n",
+      "raw_shots: 0, bootstrap_shots: 1\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Backward engine configured for all generators.\n",
+      "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 27:   0%|          | 0/13 [00:00<?, ?it/s]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.64it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1073.40it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.14it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1126.21it/s]\n",
+      "Training Step: 28:   0%|          | 0/13 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 0 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 604.56it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.83it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 540.00it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1445.81it/s]\n",
+      "Training Step: 29:  15%|█▌        | 2/13 [00:02<00:12,  1.15s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Skipping batch 1 as acc: 1.0\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 318.87it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.06it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 458.88it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1186.26it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.75\n",
+      "Moving batch correct size: 3\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 4.518843650817871\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 142.52it/s]\n",
+      "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.56it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.99s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities specified in the input. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 118.95it/s]\n",
+      "Evaluating step(2): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.76it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:05<00:08,  2.85s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 129.26it/s]\n",
+      "Evaluating step(2): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.54it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 263.51it/s]\n",
+      "Evaluating step(2): 1.0 across 4 samples, Max potential: 1.0: 100%|██████████| 4/4 [00:00<00:00,  4.20it/s]\n",
+      "Proposing:  40%|████      | 2/5 [00:10<00:15,  5.11s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.75\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2287.37it/s]\n",
+      "Evaluating step(29): 0.8158 across 38 samples, Max potential: 0.86:  76%|███████▌  | 38/50 [00:17<00:05,  2.17it/s]\n",
+      "Training Step: 30:  23%|██▎       | 3/13 [00:35<02:25, 14.59s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.7948717948717948 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.93it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.69it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 603.76it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8825.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.875\n",
+      "Moving batch correct size: 7\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Subset loss backward time: 2.2182435989379883\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure you account for all items. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 160.12it/s]\n",
+      "Evaluating step(3): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  1.72it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:11,  2.83s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 89.23it/s]\n",
+      "Evaluating step(3): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  1.66it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 8/8 [00:00<00:00, 281.73it/s]\n",
+      "Evaluating step(3): 1.0 across 8 samples, Max potential: 1.0: 100%|██████████| 8/8 [00:02<00:00,  2.96it/s]\n",
+      "Proposing:  20%|██        | 1/5 [00:08<00:34,  8.54s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.875\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1910.10it/s]\n",
+      "Evaluating step(30): 0.72 across 25 samples, Max potential: 0.86:  50%|█████     | 25/50 [00:18<00:18,  1.38it/s]\n",
+      "Training Step: 31:  31%|███       | 4/13 [01:05<03:03, 20.39s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.6923076923076923 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 310.31it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.32it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12336.19it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.9166666666666666\n",
+      "Moving batch correct size: 11\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 2.028568983078003\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 121.52it/s]\n",
+      "Evaluating step(4): 1.0 across 3 samples, Max potential: 1.0: 100%|██████████| 3/3 [00:01<00:00,  2.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 12/12 [00:00<00:00, 724.90it/s]\n",
+      "Evaluating step(4): 1.0 across 12 samples, Max potential: 1.0: 100%|██████████| 12/12 [00:03<00:00,  3.66it/s]\n",
+      "Proposing:   0%|          | 0/5 [00:05<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 1.0 >= 0.9166666666666666\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2233.56it/s]\n",
+      "Evaluating step(31): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:16<00:01,  2.81it/s]\n",
+      "Training Step: 32:  38%|███▊      | 5/13 [01:31<02:58, 22.30s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8333333333333334 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 269.31it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.20it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 606.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.58it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 15\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.9375\n",
+      "Moving batch correct size: 15\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 3.2150633335113525\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 130.57it/s]\n",
+      "Evaluating step(5): 0.5 across 2 samples, Max potential: 0.6667:  33%|███▎      | 1/3 [00:01<00:02,  1.39s/it]INFO:backoff:Backing off call(...) for 0.2s (openai.InternalServerError: <html>\n",
+      "<head><title>500 Internal Server Error</title></head>\n",
+      "<body>\n",
+      "<center><h1>500 Internal Server Error</h1></center>\n",
+      "<hr><center>nginx</center>\n",
+      "</body>\n",
+      "</html>)\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:50<00:00, 16.89s/it]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:52<03:28, 52.11s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 645.05it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 298.94it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:53<01:07, 22.46s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 751.40it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 360.88it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:54<00:25, 12.66s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 332.13it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 276.08it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:55<00:08,  8.12s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 440.13it/s]\n",
+      "Evaluating step(5): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 235.96it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:57<00:00, 11.41s/it]\n",
+      "Training Step: 33:  46%|████▌     | 6/13 [02:33<04:07, 35.35s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 317.05it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 676.47it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 543.36it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1518.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 7.857504606246948\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 282.66it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:02<00:00,  2.75it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.26s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 687.22it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 539.26it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:04<00:06,  2.16s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 608.62it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 246.48it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:05<00:03,  1.68s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 417.60it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 422.96it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:07<00:01,  1.58s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 464.91it/s]\n",
+      "Evaluating step(6): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 269.93it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:08<00:00,  1.67s/it]\n",
+      "Training Step: 34:  54%|█████▍    | 7/13 [02:49<02:55, 29.23s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 104.68it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.42it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 556.85it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14230.04it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Subset loss backward time: 6.2225048542022705\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 584.16it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:01<00:00,  4.41it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:10,  2.54s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 943.25it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 367.37it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.65s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 802.76it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 290.57it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 736.81it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 352.92it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.31s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 596.84it/s]\n",
+      "Evaluating step(7): 0.5 across 6 samples, Max potential: 0.5: 100%|██████████| 6/6 [00:00<00:00, 250.75it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.45s/it]\n",
+      "Training Step: 35:  62%|██████▏   | 8/13 [03:04<02:04, 24.82s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.5 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 70.79it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.78it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 388.55it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2027.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 5.618266582489014\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 304.00it/s]\n",
+      "Evaluating step(8): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:02<00:00,  2.79it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:03<00:13,  3.44s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly and ensure each item is counted correctly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 128.97it/s]\n",
+      "Evaluating step(8): 1.0 across 6 samples, Max potential: 1.0: 100%|██████████| 6/6 [00:01<00:00,  3.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass subset check: 1.0 > 0.6666666666666666\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 20/20 [00:00<00:00, 649.93it/s]\n",
+      "Evaluating step(8): 0.95 across 20 samples, Max potential: 0.95: 100%|██████████| 20/20 [00:02<00:00,  8.93it/s]\n",
+      "Proposing:  20%|██        | 1/5 [00:08<00:35,  8.79s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass full check: 0.95 >= 0.9\n",
+      "Done with proposals\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2667.62it/s]\n",
+      "Evaluating step(35): 0.8511 across 47 samples, Max potential: 0.86:  94%|█████████▍| 47/50 [00:00<00:00, 559.52it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8333333333333334 <= 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 36:  69%|██████▉   | 9/13 [03:21<01:29, 22.39s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 154.85it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.33it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.06it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1798.78it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.95\n",
+      "Moving batch correct size: 19\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 2.553833246231079\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 228.47it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:01<00:00,  2.44it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.47s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 700.57it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 207.56it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:05,  1.69s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 782.91it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 712.51it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.49s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 269.05it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 266.32it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:06<00:01,  1.40s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 466.64it/s]\n",
+      "Evaluating step(9): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 498.14it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.48s/it]\n",
+      "Training Step: 37:  77%|███████▋  | 10/13 [03:33<00:56, 18.97s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.54it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.77it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 561.81it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.40it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 23\n",
+      "Moving batch error size: 1\n",
+      "Moving batch acc: 0.95\n",
+      "Moving batch correct size: 19\n",
+      "Moving batch error size: 1\n",
+      "Subset Error size: 1\n",
+      "Subset Correct size: 2\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Subset loss backward time: 2.35148024559021\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 139.22it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00,  3.95it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:01<00:07,  1.81s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 277.60it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 561.39it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:02<00:04,  1.42s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 736.01it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 168.63it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:03<00:02,  1.24s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 441.77it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 518.09it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.19s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 3/3 [00:00<00:00, 396.70it/s]\n",
+      "Evaluating step(10): 0.6667 across 3 samples, Max potential: 0.6667: 100%|██████████| 3/3 [00:00<00:00, 199.84it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]\n",
+      "Training Step: 38:  85%|████████▍ | 11/13 [03:43<00:32, 16.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 138.49it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  6.41it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 610.01it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10665.74it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Moving batch correct size: 22\n",
+      "Moving batch error size: 2\n",
+      "Moving batch acc: 0.9\n",
+      "Moving batch correct size: 18\n",
+      "Moving batch error size: 2\n",
+      "Subset Error size: 2\n",
+      "Subset Correct size: 4\n",
+      "Subset score: 0.6666666666666666\n",
+      "Subset batch acc: 0.6666666666666666\n",
+      "Subset loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Subset loss backward time: 11.797855138778687\n",
+      "Optimizer propose...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Proposing:   0%|          | 0/5 [00:00<?, ?it/s]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 221.09it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:01<00:00,  4.45it/s]\n",
+      "\n",
+      "Proposing:  20%|██        | 1/5 [00:02<00:09,  2.46s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 690.80it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 309.16it/s]\n",
+      "\n",
+      "Proposing:  40%|████      | 2/5 [00:03<00:04,  1.61s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 488.13it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 365.81it/s]\n",
+      "\n",
+      "Proposing:  60%|██████    | 3/5 [00:04<00:02,  1.36s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 693.52it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 272.61it/s]\n",
+      "\n",
+      "Proposing:  80%|████████  | 4/5 [00:05<00:01,  1.35s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "New prompts:  [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Pay special attention to quantities mentioned explicitly, including multiples. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Loading Data: 100%|██████████| 6/6 [00:00<00:00, 767.58it/s]\n",
+      "Evaluating step(11): 0.6667 across 6 samples, Max potential: 0.6667: 100%|██████████| 6/6 [00:00<00:00, 719.89it/s]\n",
+      "\n",
+      "Proposing: 100%|██████████| 5/5 [00:07<00:00,  1.41s/it]\n",
+      "Training Step: 38:  92%|█████████▏| 12/13 [04:02<00:20, 20.21s/it]\n",
+      "Epoch: 100%|██████████| 1/1 [04:02<00:00, 242.58s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail subset check, try next proposal: 0.6666666666666666 <= 0.6666666666666666\n",
+      "Done with proposals\n",
+      "No proposal can improve the subset and full set, go to next step\n",
+      "Reached max steps\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Starting step: 38\n",
+      "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 39:   0%|          | 0/12 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 39\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.31it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n",
+      "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
+      "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n",
+      "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 812.53it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 2283.86it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11023.14it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 294.28it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 485.47it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11015.90it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['0e8910c8-703d-4766-a483-c5691125fd03']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Fridge\\n\\n  2. Chair\\n\\n  3. Bed\\n\\n  4. Oven\\n\\n  5. Microwave\\n\\n  6. Car\\n\\n\\n  There are 6 objects in total.\\n\\n\\n  Answer: 6'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3194.64it/s]\n",
+      "Evaluating step(39): 0.6818 across 22 samples, Max potential: 0.86:  44%|████▍     | 22/50 [00:15<00:19,  1.45it/s]\n",
+      "Training Step: 40:  17%|█▋        | 2/12 [00:17<02:58, 17.85s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.6521739130434783 <= 0.86, revert\n",
+      "Training Step: 40\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 697.57it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
+      "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 562.43it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 577.17it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9709.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 142.07it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.41it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 311.77it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 713.44it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3973.84it/s]\n",
+      "Evaluating step(40): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 440.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 41:  33%|███▎      | 4/12 [00:22<00:49,  6.19s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 41\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 155.20it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n",
+      "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n",
+      "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n",
+      "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1098.13it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 521.96it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10292.77it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 172.25it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.39it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.31it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1397.05it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3444.16it/s]\n",
+      "Evaluating step(41): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 318.28it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 42:  42%|████▏     | 5/12 [00:24<00:25,  3.71s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 42\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 268.35it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n",
+      "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n",
+      "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 522.44it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 344.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14755.69it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.06it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.03it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 454.94it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5319.35it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4718.96it/s]\n",
+      "Evaluating step(42): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 404.64it/s]\n",
+      "Training Step: 43:  58%|█████▊    | 7/12 [00:27<00:17,  3.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
+      "Training Step: 43\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 261.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n",
+      "\n",
+      "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 428.10it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 296.10it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11374.38it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 239.89it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 447.30it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 475.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4960.15it/s]\n",
+      "Evaluating step(43): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 464.52it/s]\n",
+      "Training Step: 44:  67%|██████▋   | 8/12 [00:30<00:10,  2.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
+      "Training Step: 44\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 237.83it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n",
+      "Trace with id 2bc992c0-9832-47f1-87c3-9f6e4b18ee99 already exists. Updating the trace.Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
+      "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 1138.91it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 394.77it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 443.51it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 247.66it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.52it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 373.33it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 830.43it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1197.95it/s]\n",
+      "Evaluating step(44): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:22<00:04,  1.87it/s]\n",
+      "Training Step: 45:  75%|███████▌  | 9/12 [00:57<00:24,  8.31s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.813953488372093 <= 0.86, revert\n",
+      "Training Step: 45\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 164.91it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n",
+      "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
+      "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n",
+      "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 731.86it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 244.23it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 395.27it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 140.54it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 448.16it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 658.37it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2621.44it/s]\n",
+      "Evaluating step(45): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 306.53it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 46:  92%|█████████▏| 11/12 [00:59<00:06,  6.78s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 46\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 256.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n",
+      "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 426.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.65it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 380.40it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 251.95it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 411.12it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 511.05it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['aefd17e5-9682-4420-a820-c484a63d6dcd']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each vegetable step by step:\\n\\n\\n  1. Carrot: 1\\n\\n  2. Onion: 1\\n\\n  3. Stalk of celery: 1\\n\\n  4. Yams: 3\\n\\n  5. Garlic: 1\\n\\n  6. Head of broccoli: 1\\n\\n  7. Potato: 1\\n\\n\\n  Now, let''s add them up:\\n\\n\\n  1 + 1 + 1 + 3 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4016.92it/s]\n",
+      "Evaluating step(46): 0.8333 across 42 samples, Max potential: 0.86:  84%|████████▍ | 42/50 [00:00<00:00, 303.81it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.813953488372093 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 47: 100%|██████████| 12/12 [01:01<00:00,  4.42s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 47\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 96.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
+      "\n",
+      "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 341.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 167.75it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 846.95it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.09it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:04<00:00,  1.03s/it]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 191.47it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 923.91it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2836.52it/s]\n",
+      "Evaluating step(47): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 371.59it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 48: : 13it [01:07,  4.63s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 48\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 189.96it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 96c716a1-e984-4fe3-9ce0-e156ac709edb already exists. Updating the trace.\n",
+      "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 295.41it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 161.24it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1621.93it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 153.47it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.07it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 207.08it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 344.25it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s list and count the vegetables mentioned:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (two cabbages)\\n\\n  4. Garlic\\n\\n  5. Carrot\\n\\n  6. Broccoli (head of broccoli)\\n\\n  7. Potato\\n\\n  8. Celery (stalk of celery)\\n\\n  9. Lettuce (lettuce head)\\n\\n\\n  Now, let''s count each vegetable:\\n\\n\\n  1. Yam: 1\\n\\n  2. Cauliflower: 1\\n\\n  3. Cabbages: 2\\n\\n  4. Garlic: 1\\n\\n  5. Carrot: 1\\n\\n  6. Broccoli: 1\\n\\n  7. Potato: 1\\n\\n  8. Celery: 1\\n\\n  9. Lettuce: 1\\n\\n\\n  Adding them up:\\n\\n\\n  1 + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1430.74it/s]\n",
+      "Evaluating step(48): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:28<00:07,  1.41it/s]\n",
+      "Training Step: 49: : 14it [01:39, 11.59s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n",
+      "Training Step: 49\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 122.71it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n",
+      "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n",
+      "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 421.38it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 121.46it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1767.14it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 166.47it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.02it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 206.20it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 983.31it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3168.14it/s]\n",
+      "Evaluating step(49): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 492.44it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 50: : 16it [01:42,  9.33s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 50\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 108.30it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n",
+      "Trace with id 660c5004-35d2-4a6d-9a06-1e0b3f032f21 already exists. Updating the trace.\n",
+      "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 220.83it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1212.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 90.57it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.12it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 208.93it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1002.82it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['fd34672a-ffd1-498e-a88f-283aa9d4f65d']\n",
+      "New prompts: [PromptData(id='a530c025-f25c-4423-b146-215ff73586f4', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='0b4dc918-1afb-4f03-9193-90ec51a9abab', name='llm_counter.few_shot_demos', data=\"Example: 'Let''s count each item step by step:\\n\\n\\n  1. Microwave: 1\\n\\n  2. Lamp: 1\\n\\n  3. Cars: 4\\n\\n  4. Stove: 1\\n\\n  5. Toaster: 1\\n\\n  6. Bed: 1\\n\\n\\n  Now, add them all together:\\n\\n\\n  1 + 1 + 4 + 1 + 1 + 1 = 9\\n\\n\\n  Answer: 9'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2261.91it/s]\n",
+      "Evaluating step(50): 0.825 across 40 samples, Max potential: 0.86:  80%|████████  | 40/50 [00:00<00:00, 281.78it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.8048780487804879 <= 0.86, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 50: 100%|██████████| 12/12 [01:49<00:00,  9.15s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Training time: 352.5873613357544s\n",
+      "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n",
+    "\n",
+    "train(debug=False, max_steps=12, strategy=\"constrained\",\n",
+    "                raw_shots=0, bootstrap_shots=1,\n",
+    "                resume_from_ckpt=ckpt_path,\n",
+    "                exclude_input_fields_from_bootstrap_demos=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "m5fZGQqLE78r"
+   },
+   "source": [
+    "I decide to try more, this time, using strategy \"random\". And in the bootstrap demo, there is one shot, but I ensure I also add the \"input\" in the demonstration."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "78JAv4ULEn07",
+    "outputId": "e87bb360-fc26-4dbd-d163-86ab32c292df"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:adalflow.core.generator:Error copying the prompt_kwargs: 'prompt' is not a valid ParameterType\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-3.5-turbo.db\n",
+      "ObjectCountAdalComponent(\n",
+      "  eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "  (task): ObjectCountTaskPipeline(\n",
+      "    (llm_counter): Generator(\n",
+      "      model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "      (prompt): Prompt(\n",
+      "        template: <START_OF_SYSTEM_PROMPT>\n",
+      "        {{system_prompt}}\n",
+      "        {# Few shot demos #}\n",
+      "        {% if few_shot_demos is not none %}\n",
+      "        Here are some examples:\n",
+      "        {{few_shot_demos}}\n",
+      "        {% endif %}\n",
+      "        <END_OF_SYSTEM_PROMPT>\n",
+      "        <START_OF_USER>\n",
+      "        {{input_str}}\n",
+      "        <END_OF_USER>\n",
+      "        , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "      )\n",
+      "      (model_client): OpenAIClient()\n",
+      "      (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "    )\n",
+      "  )\n",
+      "  (loss_fn): EvalFnToTextLoss()\n",
+      ")\n",
+      "Trainer(\n",
+      "  (adaltask): ObjectCountAdalComponent(\n",
+      "    eval_fn: compute_single_item, backward_engine: None, backward_engine_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, teacher_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}, text_optimizer_model_config: {'model_client': OpenAIClient(), 'model_kwargs': {'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}}\n",
+      "    (task): ObjectCountTaskPipeline(\n",
+      "      (llm_counter): Generator(\n",
+      "        model_kwargs={'model': 'gpt-3.5-turbo', 'max_tokens': 2000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "        (prompt): Prompt(\n",
+      "          template: <START_OF_SYSTEM_PROMPT>\n",
+      "          {{system_prompt}}\n",
+      "          {# Few shot demos #}\n",
+      "          {% if few_shot_demos is not none %}\n",
+      "          Here are some examples:\n",
+      "          {{few_shot_demos}}\n",
+      "          {% endif %}\n",
+      "          <END_OF_SYSTEM_PROMPT>\n",
+      "          <START_OF_USER>\n",
+      "          {{input_str}}\n",
+      "          <END_OF_USER>\n",
+      "          , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': None}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "        )\n",
+      "        (model_client): OpenAIClient()\n",
+      "        (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "      )\n",
+      "    )\n",
+      "    (loss_fn): EvalFnToTextLoss()\n",
+      "  )\n",
+      ")\n",
+      "raw_shots: 0, bootstrap_shots: 1\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Backward engine configured for all generators.\n",
+      "Restoring prompts: PromptData(id='44f6083f-4cf7-4a9a-bf10-20d218ee4106', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True)\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 51:   0%|          | 0/13 [00:00<?, ?it/s]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 415.27it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 224.54it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 423.57it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10894.30it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2199.38it/s]\n",
+      "Evaluating step(51): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.97it/s]\n",
+      "Training Step: 52:   8%|▊         | 1/13 [00:18<03:38, 18.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.86 <= 0.86\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.10it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 785.01it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 842.02it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6660.27it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1760.33it/s]\n",
+      "Evaluating step(52): 0.86 across 50 samples, Max potential: 0.86: 100%|██████████| 50/50 [00:12<00:00,  3.96it/s]\n",
+      "Training Step: 53:  15%|█▌        | 2/13 [00:36<03:21, 18.28s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.86 <= 0.86\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 571.26it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:00<00:00, 988.41it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 608.29it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1177.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2074.29it/s]\n",
+      "Evaluating step(53): 0.88 across 50 samples, Max potential: 0.88: 100%|██████████| 50/50 [00:16<00:00,  3.07it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.88 > 0.86\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 5848.08it/s]\n",
+      "Evaluating step(53): 0.9 across 100 samples, Max potential: 0.9: 100%|██████████| 100/100 [00:30<00:00,  3.32it/s]\n",
+      "Training Step: 54:  23%|██▎       | 3/13 [01:28<05:35, 33.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 297.78it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.95it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 407.40it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8952.62it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1274.72it/s]\n",
+      "Evaluating step(54): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer step: 0.94 > 0.88\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 6831.78it/s]\n",
+      "Evaluating step(54): 0.91 across 100 samples, Max potential: 0.91: 100%|██████████| 100/100 [00:30<00:00,  3.33it/s]\n",
+      "Training Step: 55:  31%|███       | 4/13 [02:21<06:10, 41.21s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 152.84it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.28it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 688.86it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1318.45it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data='You will answer a reasoning question. Carefully count each item and verify your total. List each item individually, ensuring each is counted as \"1\" regardless of quantity mentioned. Show your calculations step by step. The last line of your response should be: \\'Answer: $VALUE\\' where VALUE is a numerical value.', requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2011.16it/s]\n",
+      "Evaluating step(55): 0.8696 across 23 samples, Max potential: 0.94:  46%|████▌     | 23/50 [00:15<00:17,  1.52it/s]\n",
+      "Training Step: 56:  38%|███▊      | 5/13 [02:46<04:43, 35.43s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8333333333333334 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.66it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 646.55it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 2217.45it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4572.35it/s]\n",
+      "Evaluating step(56): 0.94 across 50 samples, Max potential: 0.94: 100%|██████████| 50/50 [00:00<00:00, 390.77it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.94 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 57:  46%|████▌     | 6/13 [02:54<03:02, 26.03s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 145.48it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.52it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 375.76it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1437.76it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 0.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check for any grouped items and count them correctly. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1614.47it/s]\n",
+      "Evaluating step(57): 0.7857 across 14 samples, Max potential: 0.94:  28%|██▊       | 14/50 [00:19<00:50,  1.41s/it]\n",
+      "Training Step: 58:  54%|█████▍    | 7/13 [03:23<02:42, 27.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.7333333333333333 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 137.96it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.94it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 806.79it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 11522.81it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be formatted as: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3560.17it/s]\n",
+      "Evaluating step(58): 0.88 across 25 samples, Max potential: 0.94:  50%|█████     | 25/50 [00:17<00:17,  1.45it/s]\n",
+      "Training Step: 59:  62%|██████▏   | 8/13 [03:47<02:10, 26.06s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8461538461538461 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.90it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.70it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 552.01it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 5648.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 0.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1770.11it/s]\n",
+      "Evaluating step(59): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:16<00:03,  2.49it/s]\n",
+      "Training Step: 60:  69%|██████▉   | 9/13 [04:13<01:43, 26.00s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.9069767441860465 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 314.86it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.10it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 722.53it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 7940.00it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count for precision. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 7188.43it/s]\n",
+      "Evaluating step(60): 0.8966 across 29 samples, Max potential: 0.94:  58%|█████▊    | 29/50 [00:15<00:11,  1.84it/s]\n",
+      "Training Step: 61:  77%|███████▋  | 10/13 [04:35<01:14, 24.87s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.8666666666666667 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 95.68it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.74it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 587.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 12520.31it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3036.62it/s]\n",
+      "Evaluating step(61): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.89it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.9069767441860465 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 62:  85%|████████▍ | 11/13 [04:44<00:40, 20.14s/it]\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 136.40it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  3.17it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 417.11it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14339.50it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loss backward...\n",
+      "setting pred name Generator_outputy_pred_2 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_0 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_3 score to 1.0\n",
+      "setting pred name Generator_outputy_pred_1 score to 1.0\n",
+      "Optimizer propose...\n",
+      "New prompts:  [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. Double-check your final count. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=None, requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 5035.06it/s]\n",
+      "Evaluating step(62): 0.9286 across 42 samples, Max potential: 0.94:  84%|████████▍ | 42/50 [00:00<00:00, 327.19it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Optimizer revert: 0.9069767441860465 <= 0.94\n",
+      "Saving checkpoint to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 62:  92%|█████████▏| 12/13 [04:51<00:24, 24.28s/it]\n",
+      "Epoch:   0%|          | 0/1 [04:51<?, ?it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reached max steps\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    {{system_prompt}}\n",
+      "    {# Few shot demos #}\n",
+      "    {% if few_shot_demos is not none %}\n",
+      "    Here are some examples:\n",
+      "    {{few_shot_demos}}\n",
+      "    {% endif %}\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <START_OF_USER>\n",
+      "    {{input_str}}\n",
+      "    <END_OF_USER>\n",
+      "    , prompt_kwargs: {'system_prompt': \"You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.\", 'few_shot_demos': 'None'}, prompt_variables: ['input_str', 'few_shot_demos', 'system_prompt']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "  (output_processors): ParseIntegerAnswerComponent(fun_name=parse_integer_answer)\n",
+      ")\n",
+      "cache_path: /root/.adalflow/cache_OpenAIClient_gpt-4o.db\n",
+      "Configuring teacher generator for Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator set: Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      "), teacher Generator(\n",
+      "  model_kwargs={'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 0.99, 'frequency_penalty': 0, 'presence_penalty': 0, 'stop': None}, \n",
+      "  (prompt): Prompt(\n",
+      "    template: <START_OF_SYSTEM_PROMPT>\n",
+      "    You are the feedback engine in an optimization system.\n",
+      "    \n",
+      "    Your role: Provide intelligent and creative feedback for the variable enclosed in <VARIABLE></VARIABLE> tags, based on the objective specified in <OBJECTIVE_FUNCTION></OBJECTIVE_FUNCTION> tags.\n",
+      "    1. Focus on the downstream OBJECTIVE without proposing new versions of the variable.\n",
+      "    2. Feedback examples: \"Since language models have the X failure mode...\", \"Adding X can fix this error because...\", \"Removing X can improve the objective function because...\", \"Changing X to Y would fix the mistake...\"\n",
+      "    3. Consider the variable in the context of its peers if provided.\n",
+      "    Remember:\n",
+      "    Be concise, critical, and direct.\n",
+      "    <END_OF_SYSTEM_PROMPT>\n",
+      "    <CONVERSATION>\n",
+      "    {{conversation_sec}}\n",
+      "    </CONVERSATION>\n",
+      "    {{objective_instruction_sec}}\n",
+      "    , prompt_variables: ['objective_instruction_sec', 'conversation_sec']\n",
+      "  )\n",
+      "  (model_client): OpenAIClient()\n",
+      ")\n",
+      "Teacher generator configured.\n",
+      "save to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Starting step: 62\n",
+      "trainer_results: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 63:   0%|          | 0/12 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 63\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 175.38it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id fd34672a-ffd1-498e-a88f-283aa9d4f65d already exists. Updating the trace.\n",
+      "Trace with id 82cf82ff-d826-4bb1-847c-9938aeec8ff5 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 46a8994f-fce6-4031-b251-1c8af31d88d2 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.32it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 445.92it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9063.87it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 132.51it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.85it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 913.05it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1900.02it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2838.94it/s]\n",
+      "Evaluating step(63): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:31<03:48,  5.20s/it]\n",
+      "Training Step: 64:  17%|█▋        | 2/12 [00:36<06:01, 36.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
+      "Training Step: 64\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 173.87it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 60866bed-8020-4610-a39a-a4a730c035db already exists. Updating the trace.\n",
+      "Trace with id 7694df14-3a24-40bd-a3fa-036c2645eca3 already exists. Updating the trace."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 3835ee47-6951-49ec-b285-621fc1085024 already exists. Updating the trace.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:00<00:00,  4.64it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d3f33ded-170a-4b87-9b0b-987d5fb7b817 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 1138.44it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 3232.60it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 151.65it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.21it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 725.72it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10845.00it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2971.02it/s]\n",
+      "Evaluating step(64): 0.5 across 6 samples, Max potential: 0.94:  12%|█▏        | 6/50 [00:00<00:00, 136.83it/s]\n",
+      "Training Step: 65:  33%|███▎      | 4/12 [00:41<01:29, 11.21s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.42857142857142855 <= 0.94, revert\n",
+      "Training Step: 65\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 201.47it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 99607986-e107-46b8-b86b-177b295983c4 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id ffe67a7b-7b81-4302-b6ed-4b506570274b already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:00<00:00,  2.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 71d549d2-9cc8-46ba-a7f6-d07f69263fd3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.89it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4cd9f4ec-2648-4e85-8e17-3dae1b8558d3 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 402.70it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 6304.85it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 218.71it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.19it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 858.52it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 768.93it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1635.33it/s]\n",
+      "Evaluating step(65): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pass validation: 0.96 > 0.94\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 100/100 [00:00<00:00, 3294.35it/s]\n",
+      "Evaluating step(65): 0.95 across 100 samples, Max potential: 0.95: 100%|██████████| 100/100 [00:39<00:00,  2.51it/s]\n",
+      "Training Step: 66:  42%|████▏     | 5/12 [01:50<02:42, 23.20s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 66\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 186.04it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.01it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id fe9b883c-4f47-44f7-a388-b03a2fb10413 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.30it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 12a6ff3d-f54d-4d89-b5f0-1aec30e96398 already exists. Updating the trace.\n",
+      "Trace with id 840d9ed5-8222-45a9-a406-7445feae9733 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 3a9a47c8-a210-43a4-8d24-b9159babb6e4 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 636.54it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 9420.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 111.34it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.50it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.28it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 731.61it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 1120.89it/s]\n",
+      "Evaluating step(66): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 212.00it/s]\n",
+      "Training Step: 67:  58%|█████▊    | 7/12 [01:55<01:32, 18.51s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n",
+      "Training Step: 67\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[A\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 31.60it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 5124e2e6-2aac-4dd3-ab63-9277a7b806a7 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  25%|██▌       | 1/4 [00:01<00:05,  1.78s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id ac43f3d4-d67d-4912-95d6-0baa09b52d9a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  75%|███████▌  | 3/4 [00:02<00:00,  1.63it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d71ad721-d21d-42f1-af9b-719ff026406b already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:03<00:00,  1.04it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id c42fea48-1b90-4388-92c4-b65b4356a3a2 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 420.84it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 533.39it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 48.64it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.64it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 396.85it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 8608.11it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4202.88it/s]\n",
+      "Evaluating step(67): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 405.51it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 68:  67%|██████▋   | 8/12 [02:02<00:47, 11.99s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 68\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 77.30it/s]\n",
+      "Training:   0%|          | 0/4 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id d4194dd1-739a-4509-8ac8-7c3f89649ee7 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training:  75%|███████▌  | 3/4 [00:01<00:00,  2.62it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1eb770ed-ff6f-481e-8c16-b9749a44a1a6 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 54e272c5-1360-462e-b773-4c58c61472ee already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 212.56it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10831.00it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 179.03it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.09it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 502.04it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 639.84it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3305.62it/s]\n",
+      "Evaluating step(68): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 539.54it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 69:  75%|███████▌  | 9/12 [02:09<00:32, 10.69s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 69\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 84.70it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.26s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 0e8910c8-703d-4766-a483-c5691125fd03 already exists. Updating the trace.\n",
+      "Trace with id 74d1bc97-46cd-406d-8c3a-2f999aae1b2f already exists. Updating the trace.\n",
+      "Trace with id 701be0ee-29e0-42f5-be04-72d2b73e3968 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.67it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id de4e75d6-a21b-4004-925d-a9a818bd0f7c already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 331.49it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 488.36it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 274.35it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.51it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 596.31it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14678.23it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4604.98it/s]\n",
+      "Evaluating step(69): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 88.47it/s]\n",
+      "Training Step: 70:  92%|█████████▏| 11/12 [02:13<00:08,  8.97s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Training Step: 70\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 169.70it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.03s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id e2bfbbe0-fb79-4df5-9a7d-50c9085947bc already exists. Updating the trace.\n",
+      "Trace with id 6c34d6e5-0e3d-4243-834e-fd6c5883f467 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.45it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1b4b3ab0-d20f-4fc2-a09c-4592a227a8e5 already exists. Updating the trace.\n",
+      "Trace with id aefd17e5-9682-4420-a820-c484a63d6dcd already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 285.47it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 288.20it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.75it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.60it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 293.12it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1091.27it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4087.46it/s]\n",
+      "Evaluating step(70): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 345.89it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 71: 100%|██████████| 12/12 [02:17<00:00,  6.07s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 71\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 87.52it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:03,  1.33s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 6c0d3a9a-bb01-4fb3-a68b-1edf66861235 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.37it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 234e39df-1bc4-41df-a515-895cb2614a53 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  1.92it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 8895d6bd-eab0-48af-ad4b-51f8007258b1 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.87it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 4dad0f65-d624-48c2-a795-596c00b0535a already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 262.50it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 10369.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 43.64it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.75it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 321.11it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1141.46it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['99607986-e107-46b8-b86b-177b295983c4']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 4407.91it/s]\n",
+      "Evaluating step(71): 0.96 across 50 samples, Max potential: 0.96: 100%|██████████| 50/50 [00:00<00:00, 397.66it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.96 <= 0.96, revert\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Training Step: 72: : 13it [02:23,  6.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training Step: 72\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 113.31it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:00<00:02,  1.04it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 85d63f78-39c0-4753-a9fc-52202df48673 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  50%|█████     | 2/4 [00:01<00:01,  1.82it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 433650a5-ca75-4867-b235-3af4a7c55c67 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\rTraining:  75%|███████▌  | 3/4 [00:01<00:00,  2.48it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id e250f80e-334e-4f85-ac1f-df9a2013d578 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.86it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 1d3eceeb-ad24-40f6-8752-2f38241172cb already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 170.72it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 13981.01it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 195.45it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:01<00:00,  2.46it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 241.42it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 322.68it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 3970.90it/s]\n",
+      "Evaluating step(72): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 96.75it/s] \n",
+      "Training Step: 73: : 14it [02:30,  6.33s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Training Step: 73\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 73.23it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:05,  1.97s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id daa5804f-1aad-4f01-b26c-6b31c57f065f already exists. Updating the trace.\n",
+      "Trace with id dd9d8748-4926-4bcd-902d-6a4c5cb38267 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 04e77795-cc9b-4530-a883-5f775e3fbc76 already exists. Updating the trace.\n",
+      "Trace with id 1f682cab-026c-4803-8018-a45d027aa026 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 211.00it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1551.58it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 205.19it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.80it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 266.20it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1059.57it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['000a3738-1f09-40b0-9f8b-2dec63a3f7f8']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have two heads of broccoli, an apple, a lettuce head, and two onions.\\n  How many vegetables do I have?\\nExample: 'Let''s list each item individually and determine if it is a vegetable:\\n\\n\\n  1. Two heads of broccoli (vegetables)\\n\\n  2. An apple (not a vegetable)\\n\\n  3. A lettuce head (vegetable)\\n\\n  4. Two onions (vegetables)\\n\\n\\n  Now, let''s count the vegetables:\\n\\n\\n  1. Two heads of broccoli\\n\\n  2. One lettuce head\\n\\n  3. Two onions\\n\\n\\n  Total number of vegetables:\\n\\n  2 (broccoli) + 1 (lettuce) + 2 (onions) = 5\\n\\n\\n  Answer: 5'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 2226.42it/s]\n",
+      "Evaluating step(73): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:00<00:00, 91.13it/s]\n",
+      "Training Step: 74: : 16it [02:35,  6.09s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Training Step: 74\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 93.65it/s]\n",
+      "Training:  25%|██▌       | 1/4 [00:01<00:04,  1.36s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id 945f82c7-03d9-4f49-8267-be7abac2bce6 already exists. Updating the trace.\n",
+      "Trace with id a9a202f5-e723-4d24-ae5e-ad1084a52ef8 already exists. Updating the trace.\n",
+      "Trace with id d46e538c-832d-4eb5-ba9b-a308f666baba already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.59it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Trace with id b538075d-01af-4b76-b835-9005f3044609 already exists. Updating the trace.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 115.74it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 1086.11it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 225.28it/s]\n",
+      "Training: 100%|██████████| 4/4 [00:02<00:00,  1.50it/s]\n",
+      "\n",
+      "Loading Data: 100%|██████████| 4/4 [00:00<00:00, 224.88it/s]\n",
+      "Calculating Loss: 100%|██████████| 4/4 [00:00<00:00, 14242.12it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampled_augmented_demos: ['b538075d-01af-4b76-b835-9005f3044609']\n",
+      "New prompts: [PromptData(id='327b63f0-b532-435a-85d7-6137d4e52c4c', name='llm_counter.system_prompt', data=\"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\", requires_opt=True), PromptData(id='73a3953b-6351-44d8-a36f-7521db346cca', name='llm_counter.few_shot_demos', data=\"input_str: I have a blackberry, a raspberry, a peach, a head of broccoli, a plum,\\n  an orange, two bananas, a grape, two garlics, a nectarine, a lettuce head, and an\\n  apple. How many fruits do I have?\\nExample: 'Let''s list each item and identify whether it is a fruit:\\n\\n\\n  1. Blackberry - Fruit\\n\\n  2. Raspberry - Fruit\\n\\n  3. Peach - Fruit\\n\\n  4. Head of broccoli - Not a fruit\\n\\n  5. Plum - Fruit\\n\\n  6. Orange - Fruit\\n\\n  7. Two bananas - Fruits (2 bananas)\\n\\n  8. Grape - Fruit\\n\\n  9. Two garlics - Not fruits\\n\\n  10. Nectarine - Fruit\\n\\n  11. Lettuce head - Not a fruit\\n\\n  12. Apple - Fruit\\n\\n\\n  Now, let''s count the fruits:\\n\\n\\n  1. Blackberry\\n\\n  2. Raspberry\\n\\n  3. Peach\\n\\n  4. Plum\\n\\n  5. Orange\\n\\n  6. Two bananas (counted as 2)\\n\\n  7. Grape\\n\\n  8. Nectarine\\n\\n  9. Apple\\n\\n\\n  Total number of fruits:\\n\\n  1 + 1 + 1 + 1 + 1 + 2 + 1 + 1 + 1 = 10\\n\\n\\n  Answer: 10'\", requires_opt=True)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Loading Data: 100%|██████████| 50/50 [00:00<00:00, 634.93it/s]\n",
+      "Evaluating step(74): 0.5 across 4 samples, Max potential: 0.96:   8%|▊         | 4/50 [00:28<05:25,  7.07s/it]\n",
+      "Training Step: 74: 100%|██████████| 12/12 [03:12<00:00, 16.04s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fail validation: 0.4 <= 0.96, revert\n",
+      "Saved ckpt to /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n",
+      "Training time: 484.17421078681946s\n",
+      "ckpt_file: /content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "train(debug=False, max_steps=12, strategy=\"random\",\n",
+    "                raw_shots=0, bootstrap_shots=1,\n",
+    "                resume_from_ckpt=ckpt_path,\n",
+    "                exclude_input_fields_from_bootstrap_demos=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "xTB4lO3PFPnP"
+   },
+   "source": [
+    "Finally, we got 96% on the val and 95% on the test!!! This is really close to GPT4o's performance. This took us 72 steps!\n",
+    "\n",
+    "The score is consistent, meaning this is a good prompt.\n",
+    "Here is our final optimized prompt:\n",
+    "\n",
+    "System:\n",
+    "\n",
+    "```\n",
+    "\n",
+    "\"prompt\": [\n",
+    "                {\n",
+    "                    \"id\": \"327b63f0-b532-435a-85d7-6137d4e52c4c\",\n",
+    "                    \"name\": \"llm_counter.system_prompt\",\n",
+    "                    \"data\": \"You will answer a reasoning question. Carefully count each item and verify your total. List each item individually and ensure accuracy. Show your calculations step by step. The last line of your response should be: 'Answer: $VALUE' where VALUE is a numerical value.\",\n",
+    "                    \"requires_opt\": true\n",
+    "                },\n",
+    "                {\n",
+    "                    \"id\": \"73a3953b-6351-44d8-a36f-7521db346cca\",\n",
+    "                    \"name\": \"llm_counter.few_shot_demos\",\n",
+    "                    \"data\": \"input_str: I have a yam, a cauliflower, a bed, two cabbages, a garlic, an oven, a\\n  carrot, a head of broccoli, a potato, a stalk of celery, a lettuce head, and a toaster.\\n  How many vegetables do I have?\\nExample: 'Let''s list and count each vegetable individually:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Now, let''s verify the count:\\n\\n\\n  1. Yam\\n\\n  2. Cauliflower\\n\\n  3. Cabbage (1)\\n\\n  4. Cabbage (2)\\n\\n  5. Garlic\\n\\n  6. Carrot\\n\\n  7. Broccoli\\n\\n  8. Potato\\n\\n  9. Celery\\n\\n  10. Lettuce\\n\\n\\n  Total number of vegetables: 10\\n\\n\\n  Answer: 10'\",\n",
+    "                    \"requires_opt\": true\n",
+    "                }\n",
+    "            ]\n",
+    "```\n",
+    "\n",
+    "\n",
+    "You will see all steps record from the log."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "Fr0V3XNCHAis"
+   },
+   "source": [
+    "Happy Optimizing!!!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "3Wnvqs3RyI_z"
+   },
+   "source": [
+    "# Issues and feedback\n",
+    "\n",
+    "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+    "\n",
+    "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/notebooks/tutorials/adalflow_component.ipynb b/notebooks/tutorials/adalflow_component.ipynb
new file mode 100644
index 000000000..2da8aa78e
--- /dev/null
+++ b/notebooks/tutorials/adalflow_component.ipynb
@@ -0,0 +1,985 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# 🤗 Welcome to AdalFlow!\n",
+        "## The library to build & auto-optimize any LLM task pipelines\n",
+        "\n",
+        "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
+        "\n",
+        "\n",
+        "# Quick Links\n",
+        "\n",
+        "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+        "\n",
+        "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+        "\n",
+        "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+        "\n",
+        "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+        "\n",
+        "# Author\n",
+        "\n",
+        "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n",
+        "\n",
+        "# Outline\n",
+        "\n",
+        "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
+        "\n",
+        "* How to use `DataClass` with `DataClassParser`.\n",
+        "* How to do nested dataclass, we will test both one and two levels of nesting.\n",
+        "\n",
+        "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n",
+        "\n",
+        "\n",
+        "# Installation\n",
+        "\n",
+        "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n",
+        "\n",
+        "  ```bash\n",
+        "  pip install adalflow[openai,groq]\n",
+        "  ```\n",
+        "2. Setup  `openai` and `groq` API key in the environment variables"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "Ab_OmE6XTl4h"
+      },
+      "outputs": [],
+      "source": [
+        "from IPython.display import clear_output\n",
+        "\n",
+        "!pip install -U adalflow[openai,groq,datasets]\n",
+        "\n",
+        "clear_output()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "PbAIsBeeTQUk"
+      },
+      "outputs": [],
+      "source": [
+        "import re\n",
+        "from adalflow.core import Component, Generator\n",
+        "from adalflow.components.model_client import OpenAIClient\n",
+        "from adalflow.components.model_client import GroqAPIClient\n",
+        "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kRymwpwHTQUm",
+        "outputId": "6a992f52-1661-4002-ef74-ed26938c6baa"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Please enter your OpenAI API key: ··········\n",
+            "API keys have been set.\n"
+          ]
+        }
+      ],
+      "source": [
+        "from getpass import getpass\n",
+        "import os\n",
+        "\n",
+        "# Prompt user to enter their API keys securely\n",
+        "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+        "\n",
+        "# Set environment variables\n",
+        "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+        "\n",
+        "print(\"API keys have been set.\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "czGDvnVUTQUm"
+      },
+      "outputs": [],
+      "source": [
+        "template_doc = r\"\"\"<SYS> You are a doctor </SYS> User: {{input_str}}\"\"\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PPs3gHqeTQUn"
+      },
+      "source": [
+        "Let's turn on the library log to help with debugging."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "98QNsOcSTQUn",
+        "outputId": "d63cba1b-6087-4b04-bb2b-0a9d9d4500a5"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "<RootLogger root (INFO)>"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "from adalflow.utils import get_logger\n",
+        "get_logger()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "b3ey1lozTQUo"
+      },
+      "outputs": [],
+      "source": [
+        "#Toy example\n",
+        "\n",
+        "class DocQA(Component):\n",
+        "    def __init__(self):\n",
+        "        super(DocQA, self).__init__()\n",
+        "        self.doc = Generator(\n",
+        "            template=template_doc,\n",
+        "            model_client=OpenAIClient(),\n",
+        "            model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
+        "        )\n",
+        "\n",
+        "    def call(self, query: str) -> str:\n",
+        "        return self.doc(prompt_kwargs={\"input_str\": query}).data\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TZAHSrbUTQUo",
+        "outputId": "66e81fb3-17f9-4570-dbbd-681cad1afc65"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:40:52 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n",
+            "2024-11-11 17:40:52 - generator - INFO - [generator.py:144:__init__] - Generator Generator initialized.\n"
+          ]
+        }
+      ],
+      "source": [
+        "doc = DocQA()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "f-y6l44PTQUp",
+        "outputId": "e24aabd5-d758-4700-fa0d-46b66a88c412"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': <diskcache.core.Cache object at 0x7b8d4716abc0>, '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': <function Generator.set_data_map_func.<locals>.default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'DocQA', '_init_args': {}}}\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "{'_components': OrderedDict([('doc',\n",
+              "               Generator(\n",
+              "                 model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+              "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+              "                 (model_client): OpenAIClient()\n",
+              "               ))]),\n",
+              " '_parameters': OrderedDict(),\n",
+              " 'training': False,\n",
+              " 'teacher_mode': False,\n",
+              " 'tracing': False,\n",
+              " 'name': 'DocQA',\n",
+              " '_init_args': {}}"
+            ]
+          },
+          "execution_count": 12,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# states\n",
+        "states = doc.to_dict()\n",
+        "print(states)\n",
+        "doc.__dict__"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "z_sH59_bTQUp"
+      },
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "P81kIS2qTQUp",
+        "outputId": "d8e0e398-d704-4a85-8692-66a8c570b910"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Generator, {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': <diskcache.core.Cache object at 0x7b8d4716abc0>, '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': <function Generator.set_data_map_func.<locals>.default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}}\n",
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Prompt, {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}\n",
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}\n",
+            "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': <function get_first_message_content at 0x7b8d6cd7a9e0>, '_input_type': 'text'}}\n"
+          ]
+        },
+        {
+          "data": {
+            "text/plain": [
+              "{'_components': OrderedDict([('doc',\n",
+              "               Generator(\n",
+              "                 model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+              "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+              "                 (model_client): OpenAIClient()\n",
+              "               ))]),\n",
+              " '_parameters': OrderedDict(),\n",
+              " 'training': False,\n",
+              " 'teacher_mode': False,\n",
+              " 'tracing': False,\n",
+              " 'name': 'DocQA',\n",
+              " '_init_args': {}}"
+            ]
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# restore the states\n",
+        "doc2 = DocQA.from_dict(states)\n",
+        "# print(doc2.call(\"What is the capital of France?\"))\n",
+        "doc2.__dict__\n",
+        "# doc2.to_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "198xYpLGTQUp",
+        "outputId": "ffd33d12-6db0-45c2-dfb1-3d57460ad4c9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'type': 'DocQA',\n",
+              " 'data': {'_components': {'_ordered_dict': True,\n",
+              "   'data': [('doc',\n",
+              "     {'type': 'Generator',\n",
+              "      'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n",
+              "       'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n",
+              "       'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n",
+              "       'cache': <diskcache.core.Cache at 0x7b8d4716abc0>,\n",
+              "       '_components': {'_ordered_dict': True,\n",
+              "        'data': [('prompt',\n",
+              "          {'type': 'Prompt',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'Prompt',\n",
+              "            '_init_args': {'template': None, 'prompt_kwargs': {}},\n",
+              "            'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "            'prompt_variables': ['input_str'],\n",
+              "            'prompt_kwargs': {}}}),\n",
+              "         ('model_client',\n",
+              "          {'type': 'OpenAIClient',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'OpenAIClient',\n",
+              "            '_init_args': {'api_key': None,\n",
+              "             'chat_completion_parser': None,\n",
+              "             'input_type': 'text'},\n",
+              "            '_api_key': None,\n",
+              "            'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "            '_input_type': 'text'}})]},\n",
+              "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "       'training': False,\n",
+              "       'teacher_mode': False,\n",
+              "       'tracing': False,\n",
+              "       'name': 'Generator',\n",
+              "       '_init_args': {'model_client': None,\n",
+              "        'model_kwargs': {},\n",
+              "        'template': None,\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       'backward_engine': None,\n",
+              "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "       'prompt_kwargs': {},\n",
+              "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "       'output_processors': None,\n",
+              "       'mock_output': False,\n",
+              "       'mock_output_data': 'mock data',\n",
+              "       'data_map_func': <function adalflow.core.generator.Generator.set_data_map_func.<locals>.default_map_func(data: 'GeneratorOutputType') -> str>,\n",
+              "       '_use_cache': False,\n",
+              "       '_kwargs': {'model_client': {'type': 'OpenAIClient',\n",
+              "         'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "          '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "          'training': False,\n",
+              "          'teacher_mode': False,\n",
+              "          'tracing': False,\n",
+              "          'name': 'OpenAIClient',\n",
+              "          '_init_args': {'api_key': None,\n",
+              "           'chat_completion_parser': None,\n",
+              "           'input_type': 'text'},\n",
+              "          '_api_key': None,\n",
+              "          'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "          '_input_type': 'text'}},\n",
+              "        'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "        'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       '_teacher': None}})]},\n",
+              "  '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "  'training': False,\n",
+              "  'teacher_mode': False,\n",
+              "  'tracing': False,\n",
+              "  'name': 'DocQA',\n",
+              "  '_init_args': {}}}"
+            ]
+          },
+          "execution_count": 14,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc2.to_dict() == doc.to_dict()\n",
+        "doc2.to_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Ulb1OWxxTQUq",
+        "outputId": "99972fcd-ed52-43b4-e461-a76c19bd9522"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:41:29 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
+            "2024-11-11 17:41:30 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:41:30 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', metadata=None)\n",
+            "As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(doc(\"What is the best treatment for headache?\"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "POVal8CgTQUq",
+        "outputId": "2fadb1d6-b858-4964-9045-8ea7454178e3"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:41:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
+            "2024-11-11 17:41:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:41:36 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', metadata=None)\n",
+            "As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.\n"
+          ]
+        }
+      ],
+      "source": [
+        "print(doc2(\"What is the best treatment for headache?\"))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "R5gTO1-8TQUr"
+      },
+      "source": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jhgSpKrMTQUr",
+        "outputId": "15615bf7-2b72-4ac7-d1fe-f436a7304734"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "('', DocQA(\n",
+            "  (doc): Generator(\n",
+            "    model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+            "    (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+            "    (model_client): OpenAIClient()\n",
+            "  )\n",
+            "))\n",
+            "('doc', Generator(\n",
+            "  model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+            "  (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+            "  (model_client): OpenAIClient()\n",
+            "))\n",
+            "('doc.prompt', Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str']))\n",
+            "('doc.model_client', OpenAIClient())\n"
+          ]
+        }
+      ],
+      "source": [
+        "# list other subcomponents\n",
+        "\n",
+        "for subcomponent in doc.named_components():\n",
+        "    print(subcomponent)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XjIHAY6bTQUr"
+      },
+      "source": [
+        "Let's add a parameter"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "metadata": {
+        "id": "vxgjAUiFTQUr"
+      },
+      "outputs": [],
+      "source": [
+        "from adalflow.optim.parameter import Parameter\n",
+        "\n",
+        "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "86C-h1e1TQUr",
+        "outputId": "57cab4d0-eddf-433d-e364-5d7f07072fbf"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "('demo', Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}))\n"
+          ]
+        }
+      ],
+      "source": [
+        "# list all parameters\n",
+        "for param in doc.named_parameters():\n",
+        "    print(param)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_s2MPukiTQUr",
+        "outputId": "b51c7d09-fb52-42d9-b2d5-4f44f5d22dc9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "{'type': 'DocQA',\n",
+              " 'data': {'_components': {'_ordered_dict': True,\n",
+              "   'data': [('doc',\n",
+              "     {'type': 'Generator',\n",
+              "      'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n",
+              "       'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n",
+              "       'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n",
+              "       'cache': <diskcache.core.Cache at 0x7b8d4716abc0>,\n",
+              "       '_components': {'_ordered_dict': True,\n",
+              "        'data': [('prompt',\n",
+              "          {'type': 'Prompt',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'Prompt',\n",
+              "            '_init_args': {'template': None, 'prompt_kwargs': {}},\n",
+              "            'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "            'prompt_variables': ['input_str'],\n",
+              "            'prompt_kwargs': {}}}),\n",
+              "         ('model_client',\n",
+              "          {'type': 'OpenAIClient',\n",
+              "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "            'training': False,\n",
+              "            'teacher_mode': False,\n",
+              "            'tracing': False,\n",
+              "            'name': 'OpenAIClient',\n",
+              "            '_init_args': {'api_key': None,\n",
+              "             'chat_completion_parser': None,\n",
+              "             'input_type': 'text'},\n",
+              "            '_api_key': None,\n",
+              "            'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "            '_input_type': 'text'}})]},\n",
+              "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "       'training': False,\n",
+              "       'teacher_mode': False,\n",
+              "       'tracing': False,\n",
+              "       'name': 'Generator',\n",
+              "       '_init_args': {'model_client': None,\n",
+              "        'model_kwargs': {},\n",
+              "        'template': None,\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       'backward_engine': None,\n",
+              "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "       'prompt_kwargs': {},\n",
+              "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "       'output_processors': None,\n",
+              "       'mock_output': False,\n",
+              "       'mock_output_data': 'mock data',\n",
+              "       'data_map_func': <function adalflow.core.generator.Generator.set_data_map_func.<locals>.default_map_func(data: 'GeneratorOutputType') -> str>,\n",
+              "       '_use_cache': False,\n",
+              "       '_kwargs': {'model_client': {'type': 'OpenAIClient',\n",
+              "         'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
+              "          '_parameters': {'_ordered_dict': True, 'data': []},\n",
+              "          'training': False,\n",
+              "          'teacher_mode': False,\n",
+              "          'tracing': False,\n",
+              "          'name': 'OpenAIClient',\n",
+              "          '_init_args': {'api_key': None,\n",
+              "           'chat_completion_parser': None,\n",
+              "           'input_type': 'text'},\n",
+              "          '_api_key': None,\n",
+              "          'chat_completion_parser': <function adalflow.components.model_client.openai_client.get_first_message_content(completion: openai.types.chat.chat_completion.ChatCompletion) -> str>,\n",
+              "          '_input_type': 'text'}},\n",
+              "        'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
+              "        'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
+              "        'prompt_kwargs': {},\n",
+              "        'output_processors': None,\n",
+              "        'name': None,\n",
+              "        'cache_path': None,\n",
+              "        'use_cache': False},\n",
+              "       '_teacher': None}})]},\n",
+              "  '_parameters': {'_ordered_dict': True,\n",
+              "   'data': [('demo',\n",
+              "     {'name': 'param_313f196d-3c48-4eb3-8138-b7bd74298fbd',\n",
+              "      'role_desc': '',\n",
+              "      'data': 'demo',\n",
+              "      'requires_opt': True,\n",
+              "      'param_type': 'none ()',\n",
+              "      'predecessors': [],\n",
+              "      'gradients': [],\n",
+              "      'previous_data': None,\n",
+              "      'gradients_context': [],\n",
+              "      'grad_fn': 'None',\n",
+              "      'gradient_prompt': 'None',\n",
+              "      'raw_response': None,\n",
+              "      'score': None,\n",
+              "      'traces': {},\n",
+              "      'input_args': None,\n",
+              "      'demos': []})]},\n",
+              "  'training': False,\n",
+              "  'teacher_mode': False,\n",
+              "  'tracing': False,\n",
+              "  'name': 'DocQA',\n",
+              "  '_init_args': {}}}"
+            ]
+          },
+          "execution_count": 20,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc.to_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 21,
+      "metadata": {
+        "id": "mcIO1DuVTQUr"
+      },
+      "outputs": [],
+      "source": [
+        "from adalflow.utils.file_io import save_json\n",
+        "\n",
+        "save_json(doc.to_dict(), \"doc.json\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "0vvO0nogTQUr",
+        "outputId": "59131d9e-a996-4c8b-f32c-9a6a623d3db6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "OrderedDict([('demo',\n",
+              "              Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[],            raw_response=None, input_args=None, traces={}))])"
+            ]
+          },
+          "execution_count": 22,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc.state_dict()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 23,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 125
+        },
+        "id": "uroqi93tTQUs",
+        "outputId": "8a3e4ecc-1368-475b-dc4d-2ff38821b8ac"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:42:18 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for a cold?'}]}\n",
+            "2024-11-11 17:42:19 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:42:19 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=85, prompt_tokens=28, total_tokens=113), raw_response='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', metadata=None)\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.'"
+            ]
+          },
+          "execution_count": 23,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "doc.call(\"What is the best treatment for a cold?\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mYSDr462TQUs",
+        "outputId": "82414c82-8feb-4667-90ed-91c594cc6a73"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2\n",
+            "<class 'adalflow.core.component.FunComponent'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "from adalflow.core.component import FunComponent\n",
+        "\n",
+        "def add_one(x):\n",
+        "    return x + 1\n",
+        "\n",
+        "fun_component = FunComponent(add_one)\n",
+        "print(fun_component(1))\n",
+        "print(type(fun_component))\n",
+        "\n",
+        "# output:\n",
+        "# 2\n",
+        "# <class 'core.component.FunComponent'>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3MW1tpzRTQUs",
+        "outputId": "351b8922-1423-434a-f470-ff435a1962d2"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2\n",
+            "<class 'adalflow.core.component.AddOneComponent'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "from adalflow.core.component import fun_to_component\n",
+        "\n",
+        "fun_component = fun_to_component(add_one)\n",
+        "print(fun_component(1))\n",
+        "print(type(fun_component))\n",
+        "\n",
+        "# output:\n",
+        "# 2\n",
+        "# <class 'adalflow.core.component.AddOneComponent'>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "dxAoGrnQTQUs",
+        "outputId": "38c462a3-5abf-41f4-9231-746c8d0ffcb3"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2\n",
+            "<class 'adalflow.core.component.AddOneComponent'>\n"
+          ]
+        }
+      ],
+      "source": [
+        "# use it as a decorator\n",
+        "@fun_to_component\n",
+        "def add_one(x):\n",
+        "    return x + 1\n",
+        "\n",
+        "print(add_one(1))\n",
+        "print(type(add_one))\n",
+        "\n",
+        "# output:\n",
+        "# 2\n",
+        "# <class 'adalflow.core.component.AddOneComponent'>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7BvJEP_mTQUs",
+        "outputId": "066281b8-a650-4c48-c786-312022198015"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "2024-11-11 17:42:39 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n",
+            "2024-11-11 17:42:40 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+            "2024-11-11 17:42:40 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', error=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=37, total_tokens=74), raw_response='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', metadata=None)\n",
+            "The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.\n"
+          ]
+        }
+      ],
+      "source": [
+        "from adalflow.core import Sequential\n",
+        "\n",
+        "@fun_to_component\n",
+        "def enhance_query(query:str) -> str:\n",
+        "    return query + \"Please be concise and only list the top treatments.\"\n",
+        "\n",
+        "seq = Sequential(enhance_query, doc)\n",
+        "\n",
+        "query = \"What is the best treatment for headache?\"\n",
+        "print(seq(query))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "aoZ2w8RUTQUt",
+        "outputId": "115d0ccf-33d1-4464-a951-cf9f5476284b"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "Sequential(\n",
+              "  (0): EnhanceQueryComponent(fun_name=enhance_query)\n",
+              "  (1): DocQA(\n",
+              "    (doc): Generator(\n",
+              "      model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n",
+              "      (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
+              "      (model_client): OpenAIClient()\n",
+              "    )\n",
+              "  )\n",
+              ")"
+            ]
+          },
+          "execution_count": 29,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "seq"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F-ffAlC6TQUt"
+      },
+      "source": [
+        "# TODO: LLM for single choices"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Issues and feedback\n",
+        "\n",
+        "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+        "\n",
+        "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/tutorials/adalflow_dataclasses.ipynb b/notebooks/tutorials/adalflow_dataclasses.ipynb
index 5218f5e71..3c96ffe5d 100644
--- a/notebooks/tutorials/adalflow_dataclasses.ipynb
+++ b/notebooks/tutorials/adalflow_dataclasses.ipynb
@@ -1,963 +1,963 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "hGLYrUwBmvUD"
-      },
-      "source": [
-        "<a target=\"_blank\" href=\"https://colab.research.google.com/github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_dataclasses.ipynb\">\n",
-        "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
-        "</a>\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "gHK6HFngl6iP"
-      },
-      "source": [
-        "# 🤗 Welcome to AdalFlow!\n",
-        "## The library to build & auto-optimize any LLM task pipelines\n",
-        "\n",
-        "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
-        "\n",
-        "\n",
-        "# Quick Links\n",
-        "\n",
-        "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
-        "\n",
-        "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
-        "\n",
-        "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
-        "\n",
-        "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
-        "\n",
-        "# Author\n",
-        "\n",
-        "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n",
-        "\n",
-        "# Outline\n",
-        "\n",
-        "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
-        "\n",
-        "* How to use `DataClass` with `DataClassParser`.\n",
-        "* How to do nested dataclass, we will test both one and two levels of nesting.\n",
-        "\n",
-        "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n",
-        "\n",
-        "\n",
-        "# Installation\n",
-        "\n",
-        "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n",
-        "\n",
-        "  ```bash\n",
-        "  pip install adalflow[openai,groq]\n",
-        "  ```\n",
-        "2. Setup  `openai` and `groq` API key in the environment variables"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nqe-vxB1BCux"
-      },
-      "source": [
-        "### Install adalflow"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "id": "ZaaevxNH9JMQ"
-      },
-      "outputs": [],
-      "source": [
-        "# Install adalflow with necessary dependencies\n",
-        "from IPython.display import clear_output\n",
-        "\n",
-        "!pip install -U adalflow[openai,groq]\n",
-        "\n",
-        "clear_output()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NGE70aZ8BLuf"
-      },
-      "source": [
-        "### Set Environment Variables\n",
-        "\n",
-        "Note: Enter your api keys in below cell"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 23,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "j2xmGr_99YDq",
-        "outputId": "c3d1e0b7-9072-412e-fed1-4578404357be"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Overwriting .env\n"
-          ]
-        }
-      ],
-      "source": [
-        "%%writefile .env\n",
-        "\n",
-        "OPENAI_API_KEY=\"PASTE-OPENAI_API_KEY_HERE\"\n",
-        "GROQ_API_KEY=\"PASTE-GROQ_API_KEY-HERE\""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "API keys have been set.\n"
-          ]
-        }
-      ],
-      "source": [
-        "#  or more securely\n",
-        "\n",
-        "import os\n",
-        "\n",
-        "from getpass import getpass\n",
-        "\n",
-        "# Prompt user to enter their API keys securely\n",
-        "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n",
-        "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
-        "\n",
-        "\n",
-        "# Set environment variables\n",
-        "os.environ['GROQ_API_KEY'] = groq_api_key\n",
-        "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
-        "\n",
-        "print(\"API keys have been set.\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZxBkm77uBZpl"
-      },
-      "source": [
-        "### Import necessary libraries"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "id": "wOAiKg899Z2u"
-      },
-      "outputs": [],
-      "source": [
-        "# Import required libraries\n",
-        "from dataclasses import dataclass, field\n",
-        "from typing import List, Dict\n",
-        "import adalflow as adal\n",
-        "from adalflow.components.model_client import GroqAPIClient\n",
-        "from adalflow.utils import setup_env"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'0.2.4'"
-            ]
-          },
-          "execution_count": 2,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "adal.__version__"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "bTzgyp6S9bnH"
-      },
-      "outputs": [],
-      "source": [
-        "# Load environment variables - Make sure to have OPENAI_API_KEY in .env file and .env is present in current folder\n",
-        "setup_env(\".env\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "MBW5viOG9hM8"
-      },
-      "source": [
-        "### Basic Vannila Example"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "id": "YA4pAIek9ewc"
-      },
-      "outputs": [],
-      "source": [
-        "# Define the output structure using dataclass\n",
-        "@dataclass\n",
-        "class BasicQAOutput(adal.DataClass):\n",
-        "    explanation: str = field(\n",
-        "        metadata={\"desc\": \"A brief explanation of the concept in one sentence.\"}\n",
-        "    )\n",
-        "    example: str = field(\n",
-        "        metadata={\"desc\": \"An example of the concept in a sentence.\"}\n",
-        "    )\n",
-        "    # Control output fields order\n",
-        "    __output_fields__ = [\"explanation\", \"example\"]\n",
-        "\n",
-        "# Define the template using jinja2 syntax\n",
-        "qa_template = r\"\"\"<SYS>\n",
-        "You are a helpful assistant.\n",
-        "<OUTPUT_FORMAT>\n",
-        "{{output_format_str}}\n",
-        "</OUTPUT_FORMAT>\n",
-        "</SYS>\n",
-        "<USER> {{input_str}} </USER>\"\"\""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "id": "x4__jnbP9luN"
-      },
-      "outputs": [],
-      "source": [
-        "# Define the QA component\n",
-        "class QA(adal.Component):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
-        "        super().__init__()\n",
-        "\n",
-        "        # Initialize the parser with the output dataclass\n",
-        "        parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True)\n",
-        "\n",
-        "        # Set up the generator with model, template, and parser\n",
-        "        self.generator = adal.Generator(\n",
-        "            model_client=model_client,\n",
-        "            model_kwargs=model_kwargs,\n",
-        "            template=qa_template,\n",
-        "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str()},\n",
-        "            output_processors=parser,\n",
-        "        )\n",
-        "\n",
-        "    def call(self, query: str):\n",
-        "        \"\"\"Synchronous call to generate response\"\"\"\n",
-        "        return self.generator.call({\"input_str\": query})\n",
-        "\n",
-        "    async def acall(self, query: str):\n",
-        "        \"\"\"Asynchronous call to generate response\"\"\"\n",
-        "        return await self.generator.acall({\"input_str\": query})\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "id": "TVi3rGvs9nte"
-      },
-      "outputs": [],
-      "source": [
-        "# Example usage\n",
-        "def run_basic_example():\n",
-        "    # Instantiate the QA class with Groq model\n",
-        "    qa = QA(\n",
-        "        model_client=GroqAPIClient(),\n",
-        "        model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
-        "    )\n",
-        "\n",
-        "    # Print the QA instance details\n",
-        "    print(qa)\n",
-        "\n",
-        "    # Test the QA system\n",
-        "    response = qa(\"What is LLM?\")\n",
-        "    print(\"\\nResponse:\")\n",
-        "    print(response)\n",
-        "    print(f\"BasicQAOutput: {response.data}\")\n",
-        "    print(f\"Explanation: {response.data.explanation}\")\n",
-        "    print(f\"Example: {response.data.example}\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "QA(\n",
-            "  (generator): Generator(\n",
-            "    model_kwargs={'model': 'llama3-8b-8192'}, trainable_prompt_kwargs=[]\n",
-            "    (prompt): Prompt(\n",
-            "      template: <SYS>\n",
-            "      You are a helpful assistant.\n",
-            "      <OUTPUT_FORMAT>\n",
-            "      {{output_format_str}}\n",
-            "      </OUTPUT_FORMAT>\n",
-            "      </SYS>\n",
-            "      <USER> {{input_str}} </USER>, prompt_kwargs: {'output_format_str': 'Your output should be formatted as a standard JSON instance with the following schema:\\n```\\n{\\n    \"explanation\": \"A brief explanation of the concept in one sentence. (str) (required)\",\\n    \"example\": \"An example of the concept in a sentence. (str) (required)\"\\n}\\n```\\n-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\\n-Use double quotes for the keys and string values.\\n-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\\n-Follow the JSON formatting conventions.'}, prompt_variables: ['input_str', 'output_format_str']\n",
-            "    )\n",
-            "    (model_client): GroqAPIClient()\n",
-            "    (output_processors): DataClassParser(\n",
-            "      data_class=BasicQAOutput, format_type=json,            return_data_class=True, input_fields=[],            output_fields=['explanation', 'example']\n",
-            "      (_output_processor): JsonParser()\n",
-            "      (output_format_prompt): Prompt(\n",
-            "        template: Your output should be formatted as a standard JSON instance with the following schema:\n",
-            "        ```\n",
-            "        {{schema}}\n",
-            "        ```\n",
-            "        -Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n",
-            "        -Use double quotes for the keys and string values.\n",
-            "        -DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n",
-            "        -Follow the JSON formatting conventions., prompt_variables: ['schema']\n",
-            "      )\n",
-            "    )\n",
-            "  )\n",
-            ")\n",
-            "\n",
-            "Response:\n",
-            "GeneratorOutput(id=None, data=BasicQAOutput(explanation='Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language', example='The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy'), error=None, usage=CompletionUsage(completion_tokens=60, prompt_tokens=174, total_tokens=234), raw_response='```\\n{\\n    \"explanation\": \"Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language\",\\n    \"example\": \"The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy\"\\n}\\n```', metadata=None)\n",
-            "BasicQAOutput: BasicQAOutput(explanation='Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language', example='The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy')\n",
-            "Explanation: Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language\n",
-            "Example: The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy\n"
-          ]
-        }
-      ],
-      "source": [
-        "run_basic_example()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1n7edLQ19ql8"
-      },
-      "source": [
-        "### Example 1 - Movie analysis data class"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        "id": "5Arp4-Dq9u49"
-      },
-      "outputs": [],
-      "source": [
-        "# 1. Basic DataClass with different field types\n",
-        "@dataclass\n",
-        "class MovieReview(adal.DataClass):\n",
-        "    title: str = field(\n",
-        "        metadata={\"desc\": \"The title of the movie\"}\n",
-        "    )\n",
-        "    rating: float = field(\n",
-        "        metadata={\n",
-        "            \"desc\": \"Rating from 1.0 to 10.0\",\n",
-        "            \"min\": 1.0,\n",
-        "            \"max\": 10.0\n",
-        "        }\n",
-        "    )\n",
-        "    pros: List[str] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of positive points about the movie\"}\n",
-        "    )\n",
-        "    cons: List[str] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of negative points about the movie\"}\n",
-        "    )\n",
-        "\n",
-        "    __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 14,
-      "metadata": {
-        "id": "VLbRUzXg9yP0"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "@dataclass\n",
-        "class Actor(adal.DataClass):\n",
-        "    name: str = field(metadata={\"desc\": \"Actor's full name\"})\n",
-        "    role: str = field(metadata={\"desc\": \"Character name in the movie\"})"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        "id": "7MUcu0tk91l4"
-      },
-      "outputs": [],
-      "source": [
-        "# 2. Nested DataClass example\n",
-        "\n",
-        "# Have both MovieReview and Actor nested in DetailedMovieReview\n",
-        "\n",
-        "@dataclass\n",
-        "class DetailedMovieReview(adal.DataClass):\n",
-        "    basic_review: MovieReview\n",
-        "    cast: List[Actor] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of main actors in the movie\"}\n",
-        "    )\n",
-        "    genre: List[str] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of genres for the movie\"}\n",
-        "    )\n",
-        "    recommend: bool = field(\n",
-        "        default_factory=str,\n",
-        "        metadata={\"desc\": \"Whether you would recommend this movie\"}\n",
-        "    )\n",
-        "\n",
-        "    __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Example template for movie review\n",
-        "movie_review_template = r\"\"\"<SYS>\n",
-        "You are a professional movie critic. Analyze the given movie and provide a detailed review.\n",
-        "<OUTPUT_FORMAT>\n",
-        "{{output_format_str}}\n",
-        "</OUTPUT_FORMAT>\n",
-        "</SYS>\n",
-        "<USER> Review this movie: {{movie_title}} </USER>\"\"\""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Create the MovieReviewer component with MovieAnalysis data class\n",
-        "class MovieReviewer(adal.Component):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict, data_class: adal.DataClass):\n",
-        "        super().__init__()\n",
-        "        self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n",
-        "        parser = adal.DataClassParser(\n",
-        "            data_class=data_class,\n",
-        "            return_data_class=True\n",
-        "        )\n",
-        "        self.generator = adal.Generator(\n",
-        "            model_client=model_client,\n",
-        "            model_kwargs=model_kwargs,\n",
-        "            template=movie_review_template,\n",
-        "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt},\n",
-        "            output_processors=parser,\n",
-        "        )\n",
-        "\n",
-        "    def call(self, movie_title: str):\n",
-        "        return self.generator.call({\"movie_title\": movie_title})"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 18,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=8.5, pros=['Groundbreaking special effects', 'Intriguing story with complex themes', 'Well-developed characters', 'Excellent world-building'], cons=['Pacing can be slow in some parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action'], recommend=True)\n",
-            "BasicReview: MovieReview(title='The Matrix', rating=8.5, pros=['Groundbreaking special effects', 'Intriguing story with complex themes', 'Well-developed characters', 'Excellent world-building'], cons=['Pacing can be slow in some parts'])\n",
-            "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
-          ]
-        }
-      ],
-      "source": [
-        "# test the data class with one level of nesting\n",
-        "\n",
-        "reviewer = MovieReviewer(\n",
-        "    model_client=GroqAPIClient(),\n",
-        "    model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
-        "    data_class=DetailedMovieReview\n",
-        ")\n",
-        "\n",
-        "response = reviewer(\"The Matrix\")\n",
-        "print(f\"DetailedMovieReview: {response.data}\")\n",
-        "print(f\"BasicReview: {response.data.basic_review}\")\n",
-        "print(f\"Cast: {response.data.cast}\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 19,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.0, pros=['Innovative special effects and action sequences', 'Thought-provoking storyline', 'Engaging cyberpunk aesthetic', 'Strong performances from the cast', 'Iconic fight choreography'], cons=['Complex narrative that may confuse some viewers', 'Some dated CGI when compared to modern standards']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity'), Actor(name='Hugo Weaving', role='Agent Smith')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True)\n",
-            "BasicReview: MovieReview(title='The Matrix', rating=9.0, pros=['Innovative special effects and action sequences', 'Thought-provoking storyline', 'Engaging cyberpunk aesthetic', 'Strong performances from the cast', 'Iconic fight choreography'], cons=['Complex narrative that may confuse some viewers', 'Some dated CGI when compared to modern standards'])\n",
-            "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity'), Actor(name='Hugo Weaving', role='Agent Smith')]\n"
-          ]
-        }
-      ],
-      "source": [
-        "# try use openai model\n",
-        "reviewer = MovieReviewer(\n",
-        "    model_client=adal.OpenAIClient(),\n",
-        "    model_kwargs={\"model\": \"gpt-4o\"},\n",
-        "    data_class=DetailedMovieReview\n",
-        ")\n",
-        "response = reviewer(\"The Matrix\")\n",
-        "print(f\"DetailedMovieReview: {response.data}\")\n",
-        "print(f\"BasicReview: {response.data.basic_review}\")\n",
-        "print(f\"Cast: {response.data.cast}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We see both models can handle one level of nested dataclass quite well. And the output ordering will follow the ordering specified in __output_fields__"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 20,
-      "metadata": {
-        "id": "ekr4v8Xg93en"
-      },
-      "outputs": [],
-      "source": [
-        "# 3. second level nested dataclass\n",
-        "\n",
-        "@dataclass\n",
-        "class MovieAnalysis(adal.DataClass):\n",
-        "    review: DetailedMovieReview\n",
-        "    box_office: float = field(\n",
-        "        default=None,\n",
-        "        metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n",
-        "    )\n",
-        "    awards: Dict[str, int] = field(\n",
-        "        default=None,\n",
-        "        metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n",
-        "    )\n",
-        "\n",
-        "    __output_fields__ = [\"review\", \"box_office\", \"awards\"]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 25,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "MovieAnalysis: MovieAnalysis(review=DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Action', 'Science Fiction'], recommend=True), box_office=463.5, awards={'Best Visual Effects': 4, 'Best Film Editing': 2, 'Best Sound': 1})\n",
-            "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Action', 'Science Fiction'], recommend=True)\n",
-            "BasicReview: MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation'])\n",
-            "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
-          ]
-        }
-      ],
-      "source": [
-        "# test the data class with two levels of nested dataclass\n",
-        "\n",
-        "# gpt-3.5-turbo model\n",
-        "\n",
-        "analysis = MovieReviewer(\n",
-        "    model_client=adal.OpenAIClient(),\n",
-        "    model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
-        "    data_class=MovieAnalysis\n",
-        ")\n",
-        "\n",
-        "response = analysis(\"The Matrix\")\n",
-        "print(f\"MovieAnalysis: {response.data}\")\n",
-        "print(f\"DetailedMovieReview: {response.data.review}\")\n",
-        "print(f\"BasicReview: {response.data.review.basic_review}\")\n",
-        "print(f\"Cast: {response.data.review.cast}\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 24,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "MovieAnalysis: MovieAnalysis(review=DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True), box_office=463.5, awards={'Academy Awards': 4, 'MTV Movie Awards': 10, 'Saturn Awards': 7})\n",
-            "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True)\n",
-            "BasicReview: MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts'])\n",
-            "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
-          ]
-        }
-      ],
-      "source": [
-        "# test the data class with two levels of nested dataclass\n",
-        "\n",
-        "analysis = MovieReviewer(\n",
-        "    model_client=GroqAPIClient(),\n",
-        "    model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
-        "    data_class=MovieAnalysis\n",
-        ")\n",
-        "\n",
-        "response = analysis(\"The Matrix\")\n",
-        "print(f\"MovieAnalysis: {response.data}\")\n",
-        "print(f\"DetailedMovieReview: {response.data.review}\")\n",
-        "print(f\"BasicReview: {response.data.review.basic_review}\")\n",
-        "print(f\"Cast: {response.data.review.cast}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "pSTrf8_t-DCx"
-      },
-      "source": [
-        "### Example 2: Song Review\n",
-        "Note: Song Review is modified by keeping Example 1 - Movie Review as a reference so that we would know how to use DataClasses for similar purposes"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 26,
-      "metadata": {
-        "id": "7g9bUa0q-B6Y"
-      },
-      "outputs": [],
-      "source": [
-        "# 1. Basic DataClass with different field types\n",
-        "@dataclass\n",
-        "class SongReview(adal.DataClass):\n",
-        "    title: str = field(\n",
-        "        metadata={\"desc\": \"The title of the song\"}\n",
-        "    )\n",
-        "    album: str = field(\n",
-        "        metadata={\"desc\": \"The album of the song\"}\n",
-        "    )\n",
-        "    ranking: int = field(\n",
-        "        metadata={\n",
-        "            \"desc\": \"Billboard peak ranking from 1 to 200\",\n",
-        "            \"min\": 1,\n",
-        "            \"max\": 200\n",
-        "        }\n",
-        "    )\n",
-        "    streaming: Dict[str, int] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"}\n",
-        "    )\n",
-        "    pros: List[str] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of positive points about the song\"}\n",
-        "    )\n",
-        "    cons: List[str] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of negative points about the song\"}\n",
-        "    )\n",
-        "\n",
-        "    __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 27,
-      "metadata": {
-        "id": "UGhMRZht-HiB"
-      },
-      "outputs": [],
-      "source": [
-        "\n",
-        "@dataclass\n",
-        "class Artist(adal.DataClass):\n",
-        "    name: str = field(metadata={\"desc\": \"Artist's full name\"})\n",
-        "    role: str = field(metadata={\"desc\": \"Artist's role in the song\"})"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 28,
-      "metadata": {
-        "id": "sfNWgPYN-JAj"
-      },
-      "outputs": [],
-      "source": [
-        "# 2. Nested DataClass example\n",
-        "\n",
-        "@dataclass\n",
-        "class DetailedSongReview(adal.DataClass):\n",
-        "    basic_review: SongReview = field(\n",
-        "        default=SongReview, metadata={\"desc\": \"basic Song review details\"}\n",
-        "    )\n",
-        "    cast: List[Artist] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"}\n",
-        "    )\n",
-        "    genre: List[str] = field(\n",
-        "        default_factory=list,\n",
-        "        metadata={\"desc\": \"List of genres for the song\"}\n",
-        "    )\n",
-        "    recommend: bool = field(\n",
-        "        default_factory=str,\n",
-        "        metadata={\"desc\": \"Whether you would recommend this song\"}\n",
-        "    )\n",
-        "\n",
-        "    __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 29,
-      "metadata": {
-        "id": "HG8rtCd8-K7t"
-      },
-      "outputs": [],
-      "source": [
-        "# 3. two levels of nesting dataclass\n",
-        "\n",
-        "# all these fields as we use default, it is optional, so \n",
-        "# llm might not output that field if they dont have information\n",
-        "\n",
-        "@dataclass\n",
-        "class SongAnalysis(adal.DataClass):\n",
-        "    review: DetailedSongReview = field(\n",
-        "        default=DetailedSongReview, metadata={\"desc\": \"Song review details\"}\n",
-        "    )\n",
-        "    duration: float = field(\n",
-        "        default=None,\n",
-        "        metadata={\"desc\": \"Duration of the song\"}\n",
-        "    )\n",
-        "    awards: Dict[str, int] = field(\n",
-        "        default=None,\n",
-        "        metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n",
-        "    )\n",
-        "\n",
-        "    __output_fields__ = [\"review\", \"duration\", \"awards\"]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 30,
-      "metadata": {
-        "id": "v3mNeyz7-MpY"
-      },
-      "outputs": [],
-      "source": [
-        "# Example template for song review\n",
-        "song_review_template = r\"\"\"<SYS>\n",
-        "You are a professional song critic. Analyze the given song and provide a detailed review.\n",
-        "<OUTPUT_FORMAT>\n",
-        "{{output_format_str}}\n",
-        "</OUTPUT_FORMAT>\n",
-        "</SYS>\n",
-        "<USER> Review this song: {{song_title}} </USER>\"\"\"\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 31,
-      "metadata": {
-        "id": "X2eifXOU-OrE"
-      },
-      "outputs": [],
-      "source": [
-        "# Create the SongReviewer component with SongAnalysis data class\n",
-        "class SongReviewer(adal.Component):\n",
-        "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
-        "        super().__init__()\n",
-        "        self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n",
-        "        parser = adal.DataClassParser(\n",
-        "            data_class=SongAnalysis,\n",
-        "            return_data_class=False,\n",
-        "            format_type=\"json\"\n",
-        "        )\n",
-        "        self.generator = adal.Generator(\n",
-        "            model_client=model_client,\n",
-        "            model_kwargs=model_kwargs,\n",
-        "            template=song_review_template,\n",
-        "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt },\n",
-        "            output_processors=parser,\n",
-        "        )\n",
-        "\n",
-        "    def call(self, song_title: str):\n",
-        "        return self.generator.call({\"song_title\": song_title})"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 36,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "SongAnalysis: {'review': {'basic_review': {'title': 'Shape of You', 'album': '÷ (Divide)', 'ranking': 7, 'streaming': {'spotify': 4.5, 'youtube': 2.5}, 'pros': ['Catchy beat', 'Catchy melody', 'Funky rhythm', 'Great lyrics'], 'cons': ['Some may find the lyrics objectifying', 'Not typically my cup of tea']}, 'cast': [{'name': 'Ed Sheeran', 'role': 'Lead vocals, songwriting'}], 'genre': ['Pop', 'Dance', 'Electro'], 'recommend': True}, 'duration': 3.53}\n"
-          ]
-        }
-      ],
-      "source": [
-        "analysis = SongReviewer(\n",
-        "     model_client=GroqAPIClient(),\n",
-        "     model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
-        ")\n",
-        "\n",
-        "response = analysis(\"Shape of you\")\n",
-        "print(f\"SongAnalysis: {response.data}\")\n",
-        "\n",
-        "# this time as we set `return_data_class` to False in the parser, we get the output as dict"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 38,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Song Title: Shape of You\n",
-            "Album: ÷ (Divide)\n",
-            "Ranking: 7\n",
-            "- spotify - 4.5 million views\n",
-            "- youtube - 2.5 million views\n",
-            "\n",
-            "Pros:\n",
-            "- Catchy beat\n",
-            "- Catchy melody\n",
-            "- Funky rhythm\n",
-            "- Great lyrics\n",
-            "\n",
-            "Artist's:\n",
-            "- Ed Sheeran as Lead vocals, songwriting\n",
-            "\n",
-            "Genere:  \n",
-            " Pop \n",
-            " Dance \n",
-            " Electro \n",
-            "\n",
-            "Duration: 3.53 minutes\n"
-          ]
-        }
-      ],
-      "source": [
-        "# Access nested data\n",
-        "analysis = response.data\n",
-        "print(f\"Song Title: {analysis['review']['basic_review']['title']}\")\n",
-        "print(f\"Album: {analysis['review']['basic_review']['album']}\")\n",
-        "print(f\"Ranking: {analysis['review']['basic_review']['ranking']}\")\n",
-        "\n",
-        "for platform, views in analysis['review']['basic_review']['streaming'].items():\n",
-        "    print(f\"- {platform} - {views} million views\")\n",
-        "print(\"\\nPros:\")\n",
-        "for pro in analysis['review'][\"basic_review\"][\"pros\"]:\n",
-        "    print(f\"- {pro}\")\n",
-        "\n",
-        "print(\"\\nArtist's:\")\n",
-        "for actor in analysis['review'][\"cast\"]:\n",
-        "        print(f\"- {actor['name']} as {actor['role']}\")\n",
-        "\n",
-        "if analysis['review']['genre']:\n",
-        "    print(f\"\\nGenere:  \")\n",
-        "    for genre in analysis['review']['genre']:\n",
-        "        print(f\" {genre} \")\n",
-        "\n",
-        "if analysis['duration']:\n",
-        "    print(f\"\\nDuration: {analysis['duration']} minutes\")\n",
-        "\n",
-        "if hasattr(analysis, 'awards') and analysis['awards']:\n",
-        "    print(\"\\nAwards:\")\n",
-        "    for category, count in analysis['awards'].items():\n",
-        "        print(f\"- {category}: {count}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "TODOs:\n",
-        "1. Add `JsonOutputParser` and `YamlOutputParser` to this notebook."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BLAF5qTEmoyW"
-      },
-      "source": [
-        "# Issues and feedback\n",
-        "\n",
-        "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
-        "\n",
-        "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
-      ]
-    }
-  ],
-  "metadata": {
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "hGLYrUwBmvUD"
+   },
+   "source": [
+    "<a target=\"_blank\" href=\"https://colab.research.google.com/github.com/SylphAI-Inc/AdalFlow/blob/main/notebooks/tutorials/adalflow_dataclasses.ipynb\">\n",
+    "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
+    "</a>\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "gHK6HFngl6iP"
+   },
+   "source": [
+    "# 🤗 Welcome to AdalFlow!\n",
+    "## The library to build & auto-optimize any LLM task pipelines\n",
+    "\n",
+    "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ <i>Star us on <a href=\"https://github.com/SylphAI-Inc/AdalFlow\">Github</a> </i> ⭐\n",
+    "\n",
+    "\n",
+    "# Quick Links\n",
+    "\n",
+    "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n",
+    "\n",
+    "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n",
+    "\n",
+    "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n",
+    "\n",
+    "Common use cases along with the auto-optimization:  check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n",
+    "\n",
+    "# Author\n",
+    "\n",
+    "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n",
+    "\n",
+    "# Outline\n",
+    "\n",
+    "This is a quick introduction of what AdalFlow is capable of. We will cover:\n",
+    "\n",
+    "* How to use `DataClass` with `DataClassParser`.\n",
+    "* How to do nested dataclass, we will test both one and two levels of nesting.\n",
+    "\n",
+    "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n",
+    "\n",
+    "\n",
+    "# Installation\n",
+    "\n",
+    "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n",
+    "\n",
+    "  ```bash\n",
+    "  pip install adalflow[openai,groq]\n",
+    "  ```\n",
+    "2. Setup  `openai` and `groq` API key in the environment variables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "nqe-vxB1BCux"
+   },
+   "source": [
+    "### Install adalflow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "ZaaevxNH9JMQ"
+   },
+   "outputs": [],
+   "source": [
+    "# Install adalflow with necessary dependencies\n",
+    "from IPython.display import clear_output\n",
+    "\n",
+    "!pip install -U adalflow[openai,groq]\n",
+    "\n",
+    "clear_output()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "NGE70aZ8BLuf"
+   },
+   "source": [
+    "### Set Environment Variables\n",
+    "\n",
+    "Note: Enter your api keys in below cell"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
     "colab": {
-      "collapsed_sections": [
-        "nqe-vxB1BCux",
-        "NGE70aZ8BLuf"
-      ],
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "my-project-kernel",
-      "language": "python",
-      "name": "my-project-kernel"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.12.4"
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "j2xmGr_99YDq",
+    "outputId": "c3d1e0b7-9072-412e-fed1-4578404357be"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting .env\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%writefile .env\n",
+    "\n",
+    "OPENAI_API_KEY=\"PASTE-OPENAI_API_KEY_HERE\"\n",
+    "GROQ_API_KEY=\"PASTE-GROQ_API_KEY-HERE\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "API keys have been set.\n"
+     ]
     }
+   ],
+   "source": [
+    "#  or more securely\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# Prompt user to enter their API keys securely\n",
+    "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n",
+    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+    "\n",
+    "\n",
+    "# Set environment variables\n",
+    "os.environ['GROQ_API_KEY'] = groq_api_key\n",
+    "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+    "\n",
+    "print(\"API keys have been set.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZxBkm77uBZpl"
+   },
+   "source": [
+    "### Import necessary libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "wOAiKg899Z2u"
+   },
+   "outputs": [],
+   "source": [
+    "# Import required libraries\n",
+    "from dataclasses import dataclass, field\n",
+    "from typing import List, Dict\n",
+    "import adalflow as adal\n",
+    "from adalflow.components.model_client import GroqAPIClient\n",
+    "from adalflow.utils import setup_env"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'0.2.4'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adal.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "bTzgyp6S9bnH"
+   },
+   "outputs": [],
+   "source": [
+    "# Load environment variables - Make sure to have OPENAI_API_KEY in .env file and .env is present in current folder\n",
+    "setup_env(\".env\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MBW5viOG9hM8"
+   },
+   "source": [
+    "### Basic Vannila Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "YA4pAIek9ewc"
+   },
+   "outputs": [],
+   "source": [
+    "# Define the output structure using dataclass\n",
+    "@dataclass\n",
+    "class BasicQAOutput(adal.DataClass):\n",
+    "    explanation: str = field(\n",
+    "        metadata={\"desc\": \"A brief explanation of the concept in one sentence.\"}\n",
+    "    )\n",
+    "    example: str = field(\n",
+    "        metadata={\"desc\": \"An example of the concept in a sentence.\"}\n",
+    "    )\n",
+    "    # Control output fields order\n",
+    "    __output_fields__ = [\"explanation\", \"example\"]\n",
+    "\n",
+    "# Define the template using jinja2 syntax\n",
+    "qa_template = r\"\"\"<SYS>\n",
+    "You are a helpful assistant.\n",
+    "<OUTPUT_FORMAT>\n",
+    "{{output_format_str}}\n",
+    "</OUTPUT_FORMAT>\n",
+    "</SYS>\n",
+    "<USER> {{input_str}} </USER>\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "id": "x4__jnbP9luN"
+   },
+   "outputs": [],
+   "source": [
+    "# Define the QA component\n",
+    "class QA(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        # Initialize the parser with the output dataclass\n",
+    "        parser = adal.DataClassParser(data_class=BasicQAOutput, return_data_class=True)\n",
+    "\n",
+    "        # Set up the generator with model, template, and parser\n",
+    "        self.generator = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=qa_template,\n",
+    "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str()},\n",
+    "            output_processors=parser,\n",
+    "        )\n",
+    "\n",
+    "    def call(self, query: str):\n",
+    "        \"\"\"Synchronous call to generate response\"\"\"\n",
+    "        return self.generator.call({\"input_str\": query})\n",
+    "\n",
+    "    async def acall(self, query: str):\n",
+    "        \"\"\"Asynchronous call to generate response\"\"\"\n",
+    "        return await self.generator.acall({\"input_str\": query})\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "id": "TVi3rGvs9nte"
+   },
+   "outputs": [],
+   "source": [
+    "# Example usage\n",
+    "def run_basic_example():\n",
+    "    # Instantiate the QA class with Groq model\n",
+    "    qa = QA(\n",
+    "        model_client=GroqAPIClient(),\n",
+    "        model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    "    )\n",
+    "\n",
+    "    # Print the QA instance details\n",
+    "    print(qa)\n",
+    "\n",
+    "    # Test the QA system\n",
+    "    response = qa(\"What is LLM?\")\n",
+    "    print(\"\\nResponse:\")\n",
+    "    print(response)\n",
+    "    print(f\"BasicQAOutput: {response.data}\")\n",
+    "    print(f\"Explanation: {response.data.explanation}\")\n",
+    "    print(f\"Example: {response.data.example}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "QA(\n",
+      "  (generator): Generator(\n",
+      "    model_kwargs={'model': 'llama3-8b-8192'}, trainable_prompt_kwargs=[]\n",
+      "    (prompt): Prompt(\n",
+      "      template: <SYS>\n",
+      "      You are a helpful assistant.\n",
+      "      <OUTPUT_FORMAT>\n",
+      "      {{output_format_str}}\n",
+      "      </OUTPUT_FORMAT>\n",
+      "      </SYS>\n",
+      "      <USER> {{input_str}} </USER>, prompt_kwargs: {'output_format_str': 'Your output should be formatted as a standard JSON instance with the following schema:\\n```\\n{\\n    \"explanation\": \"A brief explanation of the concept in one sentence. (str) (required)\",\\n    \"example\": \"An example of the concept in a sentence. (str) (required)\"\\n}\\n```\\n-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\\n-Use double quotes for the keys and string values.\\n-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\\n-Follow the JSON formatting conventions.'}, prompt_variables: ['input_str', 'output_format_str']\n",
+      "    )\n",
+      "    (model_client): GroqAPIClient()\n",
+      "    (output_processors): DataClassParser(\n",
+      "      data_class=BasicQAOutput, format_type=json,            return_data_class=True, input_fields=[],            output_fields=['explanation', 'example']\n",
+      "      (_output_processor): JsonParser()\n",
+      "      (output_format_prompt): Prompt(\n",
+      "        template: Your output should be formatted as a standard JSON instance with the following schema:\n",
+      "        ```\n",
+      "        {{schema}}\n",
+      "        ```\n",
+      "        -Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n",
+      "        -Use double quotes for the keys and string values.\n",
+      "        -DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n",
+      "        -Follow the JSON formatting conventions., prompt_variables: ['schema']\n",
+      "      )\n",
+      "    )\n",
+      "  )\n",
+      ")\n",
+      "\n",
+      "Response:\n",
+      "GeneratorOutput(id=None, data=BasicQAOutput(explanation='Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language', example='The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy'), error=None, usage=CompletionUsage(completion_tokens=60, prompt_tokens=174, total_tokens=234), raw_response='```\\n{\\n    \"explanation\": \"Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language\",\\n    \"example\": \"The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy\"\\n}\\n```', metadata=None)\n",
+      "BasicQAOutput: BasicQAOutput(explanation='Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language', example='The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy')\n",
+      "Explanation: Large Language Model (LLM) is a type of artificial intelligence designed to process and generate human-like language\n",
+      "Example: The new LLM-powered chatbot was able to understand and respond to complex user queries with high accuracy\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_basic_example()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "1n7edLQ19ql8"
+   },
+   "source": [
+    "### Example 1 - Movie analysis data class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "id": "5Arp4-Dq9u49"
+   },
+   "outputs": [],
+   "source": [
+    "# 1. Basic DataClass with different field types\n",
+    "@dataclass\n",
+    "class MovieReview(adal.DataClass):\n",
+    "    title: str = field(\n",
+    "        metadata={\"desc\": \"The title of the movie\"}\n",
+    "    )\n",
+    "    rating: float = field(\n",
+    "        metadata={\n",
+    "            \"desc\": \"Rating from 1.0 to 10.0\",\n",
+    "            \"min\": 1.0,\n",
+    "            \"max\": 10.0\n",
+    "        }\n",
+    "    )\n",
+    "    pros: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of positive points about the movie\"}\n",
+    "    )\n",
+    "    cons: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of negative points about the movie\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "id": "VLbRUzXg9yP0"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "@dataclass\n",
+    "class Actor(adal.DataClass):\n",
+    "    name: str = field(metadata={\"desc\": \"Actor's full name\"})\n",
+    "    role: str = field(metadata={\"desc\": \"Character name in the movie\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "id": "7MUcu0tk91l4"
+   },
+   "outputs": [],
+   "source": [
+    "# 2. Nested DataClass example\n",
+    "\n",
+    "# Have both MovieReview and Actor nested in DetailedMovieReview\n",
+    "\n",
+    "@dataclass\n",
+    "class DetailedMovieReview(adal.DataClass):\n",
+    "    basic_review: MovieReview\n",
+    "    cast: List[Actor] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of main actors in the movie\"}\n",
+    "    )\n",
+    "    genre: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of genres for the movie\"}\n",
+    "    )\n",
+    "    recommend: bool = field(\n",
+    "        default_factory=str,\n",
+    "        metadata={\"desc\": \"Whether you would recommend this movie\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example template for movie review\n",
+    "movie_review_template = r\"\"\"<SYS>\n",
+    "You are a professional movie critic. Analyze the given movie and provide a detailed review.\n",
+    "<OUTPUT_FORMAT>\n",
+    "{{output_format_str}}\n",
+    "</OUTPUT_FORMAT>\n",
+    "</SYS>\n",
+    "<USER> Review this movie: {{movie_title}} </USER>\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the MovieReviewer component with MovieAnalysis data class\n",
+    "class MovieReviewer(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict, data_class: adal.DataClass):\n",
+    "        super().__init__()\n",
+    "        self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n",
+    "        parser = adal.DataClassParser(\n",
+    "            data_class=data_class,\n",
+    "            return_data_class=True\n",
+    "        )\n",
+    "        self.generator = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=movie_review_template,\n",
+    "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt},\n",
+    "            output_processors=parser,\n",
+    "        )\n",
+    "\n",
+    "    def call(self, movie_title: str):\n",
+    "        return self.generator.call({\"movie_title\": movie_title})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=8.5, pros=['Groundbreaking special effects', 'Intriguing story with complex themes', 'Well-developed characters', 'Excellent world-building'], cons=['Pacing can be slow in some parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=8.5, pros=['Groundbreaking special effects', 'Intriguing story with complex themes', 'Well-developed characters', 'Excellent world-building'], cons=['Pacing can be slow in some parts'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test the data class with one level of nesting\n",
+    "\n",
+    "reviewer = MovieReviewer(\n",
+    "    model_client=GroqAPIClient(),\n",
+    "    model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    "    data_class=DetailedMovieReview\n",
+    ")\n",
+    "\n",
+    "response = reviewer(\"The Matrix\")\n",
+    "print(f\"DetailedMovieReview: {response.data}\")\n",
+    "print(f\"BasicReview: {response.data.basic_review}\")\n",
+    "print(f\"Cast: {response.data.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.0, pros=['Innovative special effects and action sequences', 'Thought-provoking storyline', 'Engaging cyberpunk aesthetic', 'Strong performances from the cast', 'Iconic fight choreography'], cons=['Complex narrative that may confuse some viewers', 'Some dated CGI when compared to modern standards']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity'), Actor(name='Hugo Weaving', role='Agent Smith')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=9.0, pros=['Innovative special effects and action sequences', 'Thought-provoking storyline', 'Engaging cyberpunk aesthetic', 'Strong performances from the cast', 'Iconic fight choreography'], cons=['Complex narrative that may confuse some viewers', 'Some dated CGI when compared to modern standards'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity'), Actor(name='Hugo Weaving', role='Agent Smith')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# try use openai model\n",
+    "reviewer = MovieReviewer(\n",
+    "    model_client=adal.OpenAIClient(),\n",
+    "    model_kwargs={\"model\": \"gpt-4o\"},\n",
+    "    data_class=DetailedMovieReview\n",
+    ")\n",
+    "response = reviewer(\"The Matrix\")\n",
+    "print(f\"DetailedMovieReview: {response.data}\")\n",
+    "print(f\"BasicReview: {response.data.basic_review}\")\n",
+    "print(f\"Cast: {response.data.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see both models can handle one level of nested dataclass quite well. And the output ordering will follow the ordering specified in __output_fields__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "id": "ekr4v8Xg93en"
+   },
+   "outputs": [],
+   "source": [
+    "# 3. second level nested dataclass\n",
+    "\n",
+    "@dataclass\n",
+    "class MovieAnalysis(adal.DataClass):\n",
+    "    review: DetailedMovieReview\n",
+    "    box_office: float = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n",
+    "    )\n",
+    "    awards: Dict[str, int] = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"review\", \"box_office\", \"awards\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MovieAnalysis: MovieAnalysis(review=DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Action', 'Science Fiction'], recommend=True), box_office=463.5, awards={'Best Visual Effects': 4, 'Best Film Editing': 2, 'Best Sound': 1})\n",
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Action', 'Science Fiction'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=9.5, pros=['Innovative concept', 'Mind-bending plot', 'Impressive action sequences'], cons=['Some overly complex dialogue', 'Ending leaves room for interpretation'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test the data class with two levels of nested dataclass\n",
+    "\n",
+    "# gpt-3.5-turbo model\n",
+    "\n",
+    "analysis = MovieReviewer(\n",
+    "    model_client=adal.OpenAIClient(),\n",
+    "    model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
+    "    data_class=MovieAnalysis\n",
+    ")\n",
+    "\n",
+    "response = analysis(\"The Matrix\")\n",
+    "print(f\"MovieAnalysis: {response.data}\")\n",
+    "print(f\"DetailedMovieReview: {response.data.review}\")\n",
+    "print(f\"BasicReview: {response.data.review.basic_review}\")\n",
+    "print(f\"Cast: {response.data.review.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MovieAnalysis: MovieAnalysis(review=DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True), box_office=463.5, awards={'Academy Awards': 4, 'MTV Movie Awards': 10, 'Saturn Awards': 7})\n",
+      "DetailedMovieReview: DetailedMovieReview(basic_review=MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts']), cast=[Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')], genre=['Science Fiction', 'Action', 'Adventure'], recommend=True)\n",
+      "BasicReview: MovieReview(title='The Matrix', rating=9.5, pros=['Groundbreaking special effects', 'Thought-provoking themes', 'Innovative storyline', 'Strong performances from the cast'], cons=['Somewhat slow pacing in parts'])\n",
+      "Cast: [Actor(name='Keanu Reeves', role='Neo'), Actor(name='Laurence Fishburne', role='Morpheus'), Actor(name='Carrie-Anne Moss', role='Trinity')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# test the data class with two levels of nested dataclass\n",
+    "\n",
+    "analysis = MovieReviewer(\n",
+    "    model_client=GroqAPIClient(),\n",
+    "    model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    "    data_class=MovieAnalysis\n",
+    ")\n",
+    "\n",
+    "response = analysis(\"The Matrix\")\n",
+    "print(f\"MovieAnalysis: {response.data}\")\n",
+    "print(f\"DetailedMovieReview: {response.data.review}\")\n",
+    "print(f\"BasicReview: {response.data.review.basic_review}\")\n",
+    "print(f\"Cast: {response.data.review.cast}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "pSTrf8_t-DCx"
+   },
+   "source": [
+    "### Example 2: Song Review\n",
+    "Note: Song Review is modified by keeping Example 1 - Movie Review as a reference so that we would know how to use DataClasses for similar purposes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "id": "7g9bUa0q-B6Y"
+   },
+   "outputs": [],
+   "source": [
+    "# 1. Basic DataClass with different field types\n",
+    "@dataclass\n",
+    "class SongReview(adal.DataClass):\n",
+    "    title: str = field(\n",
+    "        metadata={\"desc\": \"The title of the song\"}\n",
+    "    )\n",
+    "    album: str = field(\n",
+    "        metadata={\"desc\": \"The album of the song\"}\n",
+    "    )\n",
+    "    ranking: int = field(\n",
+    "        metadata={\n",
+    "            \"desc\": \"Billboard peak ranking from 1 to 200\",\n",
+    "            \"min\": 1,\n",
+    "            \"max\": 200\n",
+    "        }\n",
+    "    )\n",
+    "    streaming: Dict[str, int] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"}\n",
+    "    )\n",
+    "    pros: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of positive points about the song\"}\n",
+    "    )\n",
+    "    cons: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of negative points about the song\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "id": "UGhMRZht-HiB"
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "@dataclass\n",
+    "class Artist(adal.DataClass):\n",
+    "    name: str = field(metadata={\"desc\": \"Artist's full name\"})\n",
+    "    role: str = field(metadata={\"desc\": \"Artist's role in the song\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "id": "sfNWgPYN-JAj"
+   },
+   "outputs": [],
+   "source": [
+    "# 2. Nested DataClass example\n",
+    "\n",
+    "@dataclass\n",
+    "class DetailedSongReview(adal.DataClass):\n",
+    "    basic_review: SongReview = field(\n",
+    "        default=SongReview, metadata={\"desc\": \"basic Song review details\"}\n",
+    "    )\n",
+    "    cast: List[Artist] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"}\n",
+    "    )\n",
+    "    genre: List[str] = field(\n",
+    "        default_factory=list,\n",
+    "        metadata={\"desc\": \"List of genres for the song\"}\n",
+    "    )\n",
+    "    recommend: bool = field(\n",
+    "        default_factory=str,\n",
+    "        metadata={\"desc\": \"Whether you would recommend this song\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "id": "HG8rtCd8-K7t"
+   },
+   "outputs": [],
+   "source": [
+    "# 3. two levels of nesting dataclass\n",
+    "\n",
+    "# all these fields as we use default, it is optional, so \n",
+    "# llm might not output that field if they dont have information\n",
+    "\n",
+    "@dataclass\n",
+    "class SongAnalysis(adal.DataClass):\n",
+    "    review: DetailedSongReview = field(\n",
+    "        default=DetailedSongReview, metadata={\"desc\": \"Song review details\"}\n",
+    "    )\n",
+    "    duration: float = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Duration of the song\"}\n",
+    "    )\n",
+    "    awards: Dict[str, int] = field(\n",
+    "        default=None,\n",
+    "        metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n",
+    "    )\n",
+    "\n",
+    "    __output_fields__ = [\"review\", \"duration\", \"awards\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "id": "v3mNeyz7-MpY"
+   },
+   "outputs": [],
+   "source": [
+    "# Example template for song review\n",
+    "song_review_template = r\"\"\"<SYS>\n",
+    "You are a professional song critic. Analyze the given song and provide a detailed review.\n",
+    "<OUTPUT_FORMAT>\n",
+    "{{output_format_str}}\n",
+    "</OUTPUT_FORMAT>\n",
+    "</SYS>\n",
+    "<USER> Review this song: {{song_title}} </USER>\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "id": "X2eifXOU-OrE"
+   },
+   "outputs": [],
+   "source": [
+    "# Create the SongReviewer component with SongAnalysis data class\n",
+    "class SongReviewer(adal.Component):\n",
+    "    def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n",
+    "        super().__init__()\n",
+    "        self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n",
+    "        parser = adal.DataClassParser(\n",
+    "            data_class=SongAnalysis,\n",
+    "            return_data_class=False,\n",
+    "            format_type=\"json\"\n",
+    "        )\n",
+    "        self.generator = adal.Generator(\n",
+    "            model_client=model_client,\n",
+    "            model_kwargs=model_kwargs,\n",
+    "            template=song_review_template,\n",
+    "            prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt },\n",
+    "            output_processors=parser,\n",
+    "        )\n",
+    "\n",
+    "    def call(self, song_title: str):\n",
+    "        return self.generator.call({\"song_title\": song_title})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SongAnalysis: {'review': {'basic_review': {'title': 'Shape of You', 'album': '÷ (Divide)', 'ranking': 7, 'streaming': {'spotify': 4.5, 'youtube': 2.5}, 'pros': ['Catchy beat', 'Catchy melody', 'Funky rhythm', 'Great lyrics'], 'cons': ['Some may find the lyrics objectifying', 'Not typically my cup of tea']}, 'cast': [{'name': 'Ed Sheeran', 'role': 'Lead vocals, songwriting'}], 'genre': ['Pop', 'Dance', 'Electro'], 'recommend': True}, 'duration': 3.53}\n"
+     ]
+    }
+   ],
+   "source": [
+    "analysis = SongReviewer(\n",
+    "     model_client=GroqAPIClient(),\n",
+    "     model_kwargs={\"model\": \"llama3-8b-8192\"},\n",
+    ")\n",
+    "\n",
+    "response = analysis(\"Shape of you\")\n",
+    "print(f\"SongAnalysis: {response.data}\")\n",
+    "\n",
+    "# this time as we set `return_data_class` to False in the parser, we get the output as dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Song Title: Shape of You\n",
+      "Album: ÷ (Divide)\n",
+      "Ranking: 7\n",
+      "- spotify - 4.5 million views\n",
+      "- youtube - 2.5 million views\n",
+      "\n",
+      "Pros:\n",
+      "- Catchy beat\n",
+      "- Catchy melody\n",
+      "- Funky rhythm\n",
+      "- Great lyrics\n",
+      "\n",
+      "Artist's:\n",
+      "- Ed Sheeran as Lead vocals, songwriting\n",
+      "\n",
+      "Genere:  \n",
+      " Pop \n",
+      " Dance \n",
+      " Electro \n",
+      "\n",
+      "Duration: 3.53 minutes\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Access nested data\n",
+    "analysis = response.data\n",
+    "print(f\"Song Title: {analysis['review']['basic_review']['title']}\")\n",
+    "print(f\"Album: {analysis['review']['basic_review']['album']}\")\n",
+    "print(f\"Ranking: {analysis['review']['basic_review']['ranking']}\")\n",
+    "\n",
+    "for platform, views in analysis['review']['basic_review']['streaming'].items():\n",
+    "    print(f\"- {platform} - {views} million views\")\n",
+    "print(\"\\nPros:\")\n",
+    "for pro in analysis['review'][\"basic_review\"][\"pros\"]:\n",
+    "    print(f\"- {pro}\")\n",
+    "\n",
+    "print(\"\\nArtist's:\")\n",
+    "for actor in analysis['review'][\"cast\"]:\n",
+    "        print(f\"- {actor['name']} as {actor['role']}\")\n",
+    "\n",
+    "if analysis['review']['genre']:\n",
+    "    print(\"\\nGenere:  \")\n",
+    "    for genre in analysis['review']['genre']:\n",
+    "        print(f\" {genre} \")\n",
+    "\n",
+    "if analysis['duration']:\n",
+    "    print(f\"\\nDuration: {analysis['duration']} minutes\")\n",
+    "\n",
+    "if hasattr(analysis, 'awards') and analysis['awards']:\n",
+    "    print(\"\\nAwards:\")\n",
+    "    for category, count in analysis['awards'].items():\n",
+    "        print(f\"- {category}: {count}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "TODOs:\n",
+    "1. Add `JsonOutputParser` and `YamlOutputParser` to this notebook."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BLAF5qTEmoyW"
+   },
+   "source": [
+    "# Issues and feedback\n",
+    "\n",
+    "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n",
+    "\n",
+    "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)."
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [
+    "nqe-vxB1BCux",
+    "NGE70aZ8BLuf"
+   ],
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "my-project-kernel",
+   "language": "python",
+   "name": "my-project-kernel"
   },
-  "nbformat": 4,
-  "nbformat_minor": 0
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/notebooks/tutorials/adalflow_text_splitter.ipynb b/notebooks/tutorials/adalflow_text_splitter.ipynb
new file mode 100644
index 000000000..66fb81c78
--- /dev/null
+++ b/notebooks/tutorials/adalflow_text_splitter.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "A99Pp0T7A9BM"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install adalflow[openai,groq,faiss-cpu]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "y2SVUBNeBMy5"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from getpass import getpass\n",
+    "\n",
+    "# You can use a setup_env file to set the OPENAI_API_KEY too\n",
+    "# (ensure you setup OPENAI_API_KEY in your project .env file) using the following commands:\n",
+    "# from adalflow.utils import setup_env\n",
+    "\n",
+    "# Prompt user to enter their API keys securely\n",
+    "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n",
+    "\n",
+    "# Set environment variables\n",
+    "os.environ['OPENAI_API_KEY'] = openai_api_key\n",
+    "\n",
+    "print(\"API keys have been set.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "RWWG9WRt2r9L",
+    "outputId": "faad52a8-47f5-48bc-e2c3-17a5aea21254"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 788.85it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Document(id=6374a3e5-2ef9-40ba-a7b3-e18c2b466390, text='Example text. More example text. ', meta_data=None, vector=[], parent_doc_id=doc1, order=0, score=None)\n",
+      "Document(id=b46045ba-3ebb-4e66-93d5-ece2d6ace3de, text='text. Even more text to ', meta_data=None, vector=[], parent_doc_id=doc1, order=1, score=None)\n",
+      "Document(id=eba5555b-e6d6-4ca1-8452-af22295e68f8, text='to illustrate.', meta_data=None, vector=[], parent_doc_id=doc1, order=2, score=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from adalflow.components.data_process.text_splitter import TextSplitter\n",
+    "from adalflow.core.types import Document\n",
+    "\n",
+    "# Configure the splitter settings\n",
+    "text_splitter = TextSplitter(\n",
+    "    split_by=\"word\",\n",
+    "    chunk_size=5,\n",
+    "    chunk_overlap=1\n",
+    ")\n",
+    "\n",
+    "# Example document\n",
+    "doc = Document(\n",
+    "    text=\"Example text. More example text. Even more text to illustrate.\",\n",
+    "    id=\"doc1\"\n",
+    ")\n",
+    "\n",
+    "# Execute the splitting\n",
+    "splitted_docs = text_splitter.call(documents=[doc])\n",
+    "\n",
+    "for doc in splitted_docs:\n",
+    "    print(doc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "LioyB3eCAOs8",
+    "outputId": "11cddc1c-608a-4027-830f-fe30a882a766"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 489.02it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Document(id=b0c308f2-73d2-44cf-aaf2-63e8f87198e4, text='Example text. More example', meta_data=None, vector=[], parent_doc_id=doc1, order=0, score=None)\n",
+      "Document(id=3a37adff-c8ac-4cff-8b5e-9c68e0de9772, text=' text. Even more text', meta_data=None, vector=[], parent_doc_id=doc1, order=1, score=None)\n",
+      "Document(id=e1b56768-7918-4a94-8f08-a01161cb2dcf, text=' to illustrate.', meta_data=None, vector=[], parent_doc_id=doc1, order=2, score=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from adalflow.components.data_process.text_splitter import TextSplitter\n",
+    "from adalflow.core.types import Document\n",
+    "\n",
+    "# Configure the splitter settings\n",
+    "text_splitter = TextSplitter(\n",
+    "    split_by=\"token\",\n",
+    "    chunk_size=5,\n",
+    "    chunk_overlap=0\n",
+    ")\n",
+    "\n",
+    "doc = Document(\n",
+    "    text=\"Example text. More example text. Even more text to illustrate.\",\n",
+    "    id = \"doc1\"\n",
+    "    )\n",
+    "\n",
+    "splitted_docs = (text_splitter.call(documents=[doc]))\n",
+    "\n",
+    "for doc in splitted_docs:\n",
+    "    print(doc)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/poetry.lock b/poetry.lock
index 093585306..edc2b949f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -44,8 +44,8 @@ testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized",
 
 [[package]]
 name = "adalflow"
-version = "0.2.2"
-description = "The Library to Build and Auto-optimize Any LLM Task Pipeline"
+version = "0.2.5"
+description = "The Library to Build and Auto-optimize LLM Applications"
 optional = false
 python-versions = ">=3.9, <4.0"
 files = []
@@ -54,6 +54,7 @@ develop = true
 [package.dependencies]
 backoff = "^2.2.1"
 botocore = "^1.34.149"
+colorama = "^0.4.6"
 diskcache = "^5.6.3"
 jinja2 = "^3.1.3"
 jsonlines = "^4.0.0"
@@ -469,6 +470,50 @@ charset-normalizer = ["charset-normalizer"]
 html5lib = ["html5lib"]
 lxml = ["lxml"]
 
+[[package]]
+name = "black"
+version = "24.10.0"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"},
+    {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"},
+    {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"},
+    {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"},
+    {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"},
+    {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"},
+    {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"},
+    {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"},
+    {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"},
+    {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"},
+    {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"},
+    {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"},
+    {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"},
+    {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"},
+    {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"},
+    {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"},
+    {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"},
+    {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"},
+    {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"},
+    {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"},
+    {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"},
+    {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.10)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
 [[package]]
 name = "bleach"
 version = "6.1.0"
@@ -525,6 +570,17 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >
 [package.extras]
 crt = ["awscrt (==0.21.2)"]
 
+[[package]]
+name = "cachetools"
+version = "5.5.0"
+description = "Extensible memoizing collections and decorators"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292"},
+    {file = "cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a"},
+]
+
 [[package]]
 name = "certifi"
 version = "2024.7.4"
@@ -1460,6 +1516,148 @@ tqdm = "*"
 [package.extras]
 test = ["build", "mypy", "pytest", "pytest-xdist", "ruff", "twine", "types-requests", "types-setuptools"]
 
+[[package]]
+name = "google-ai-generativelanguage"
+version = "0.6.10"
+description = "Google Ai Generativelanguage API client library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_ai_generativelanguage-0.6.10-py3-none-any.whl", hash = "sha256:854a2bf833d18be05ad5ef13c755567b66a4f4a870f099b62c61fe11bddabcf4"},
+    {file = "google_ai_generativelanguage-0.6.10.tar.gz", hash = "sha256:6fa642c964d8728006fe7e8771026fc0b599ae0ebeaf83caf550941e8e693455"},
+]
+
+[package.dependencies]
+google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
+google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
+proto-plus = ">=1.22.3,<2.0.0dev"
+protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
+
+[[package]]
+name = "google-api-core"
+version = "2.23.0"
+description = "Google API client core library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_api_core-2.23.0-py3-none-any.whl", hash = "sha256:c20100d4c4c41070cf365f1d8ddf5365915291b5eb11b83829fbd1c999b5122f"},
+    {file = "google_api_core-2.23.0.tar.gz", hash = "sha256:2ceb087315e6af43f256704b871d99326b1f12a9d6ce99beaedec99ba26a0ace"},
+]
+
+[package.dependencies]
+google-auth = ">=2.14.1,<3.0.dev0"
+googleapis-common-protos = ">=1.56.2,<2.0.dev0"
+grpcio = {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
+grpcio-status = {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}
+proto-plus = [
+    {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
+    {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
+]
+protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
+requests = ">=2.18.0,<3.0.0.dev0"
+
+[package.extras]
+async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"]
+grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"]
+grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
+grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
+
+[[package]]
+name = "google-api-python-client"
+version = "2.154.0"
+description = "Google API Client Library for Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_api_python_client-2.154.0-py2.py3-none-any.whl", hash = "sha256:a521bbbb2ec0ba9d6f307cdd64ed6e21eeac372d1bd7493a4ab5022941f784ad"},
+    {file = "google_api_python_client-2.154.0.tar.gz", hash = "sha256:1b420062e03bfcaa1c79e2e00a612d29a6a934151ceb3d272fe150a656dc8f17"},
+]
+
+[package.dependencies]
+google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0"
+google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0.dev0"
+google-auth-httplib2 = ">=0.2.0,<1.0.0"
+httplib2 = ">=0.19.0,<1.dev0"
+uritemplate = ">=3.0.1,<5"
+
+[[package]]
+name = "google-auth"
+version = "2.36.0"
+description = "Google Authentication Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_auth-2.36.0-py2.py3-none-any.whl", hash = "sha256:51a15d47028b66fd36e5c64a82d2d57480075bccc7da37cde257fc94177a61fb"},
+    {file = "google_auth-2.36.0.tar.gz", hash = "sha256:545e9618f2df0bcbb7dcbc45a546485b1212624716975a1ea5ae8149ce769ab1"},
+]
+
+[package.dependencies]
+cachetools = ">=2.0.0,<6.0"
+pyasn1-modules = ">=0.2.1"
+rsa = ">=3.1.4,<5"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
+enterprise-cert = ["cryptography", "pyopenssl"]
+pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
+reauth = ["pyu2f (>=0.1.5)"]
+requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
+
+[[package]]
+name = "google-auth-httplib2"
+version = "0.2.0"
+description = "Google Authentication Library: httplib2 transport"
+optional = false
+python-versions = "*"
+files = [
+    {file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"},
+    {file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"},
+]
+
+[package.dependencies]
+google-auth = "*"
+httplib2 = ">=0.19.0"
+
+[[package]]
+name = "google-generativeai"
+version = "0.8.3"
+description = "Google Generative AI High level API client library and tools."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "google_generativeai-0.8.3-py3-none-any.whl", hash = "sha256:1108ff89d5b8e59f51e63d1a8bf84701cd84656e17ca28d73aeed745e736d9b7"},
+]
+
+[package.dependencies]
+google-ai-generativelanguage = "0.6.10"
+google-api-core = "*"
+google-api-python-client = "*"
+google-auth = ">=2.15.0"
+protobuf = "*"
+pydantic = "*"
+tqdm = "*"
+typing-extensions = "*"
+
+[package.extras]
+dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "pyyaml"]
+
+[[package]]
+name = "googleapis-common-protos"
+version = "1.66.0"
+description = "Common protobufs used in Google APIs"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
+    {file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
+]
+
+[package.dependencies]
+protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
+
+[package.extras]
+grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
+
 [[package]]
 name = "graphviz"
 version = "0.20.3"
@@ -1624,6 +1822,22 @@ files = [
 [package.extras]
 protobuf = ["grpcio-tools (>=1.63.2)"]
 
+[[package]]
+name = "grpcio-status"
+version = "1.62.3"
+description = "Status proto mapping for gRPC"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "grpcio-status-1.62.3.tar.gz", hash = "sha256:289bdd7b2459794a12cf95dc0cb727bd4a1742c37bd823f760236c937e53a485"},
+    {file = "grpcio_status-1.62.3-py3-none-any.whl", hash = "sha256:f9049b762ba8de6b1086789d8315846e094edac2c50beaf462338b301a8fd4b8"},
+]
+
+[package.dependencies]
+googleapis-common-protos = ">=1.5.5"
+grpcio = ">=1.62.3"
+protobuf = ">=4.21.6"
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -1656,6 +1870,20 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 trio = ["trio (>=0.22.0,<0.26.0)"]
 
+[[package]]
+name = "httplib2"
+version = "0.22.0"
+description = "A comprehensive HTTP client library."
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"},
+    {file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"},
+]
+
+[package.dependencies]
+pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""}
+
 [[package]]
 name = "httpx"
 version = "0.27.0"
@@ -2482,8 +2710,8 @@ langchain-core = ">=0.2.38,<0.3.0"
 langchain-text-splitters = ">=0.2.0,<0.3.0"
 langsmith = ">=0.1.17,<0.2.0"
 numpy = [
-    {version = ">=1,<2", markers = "python_version < \"3.12\""},
     {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1,<2", markers = "python_version < \"3.12\""},
 ]
 pydantic = ">=1,<3"
 PyYAML = ">=5.3"
@@ -2509,8 +2737,8 @@ langchain = ">=0.2.16,<0.3.0"
 langchain-core = ">=0.2.38,<0.3.0"
 langsmith = ">=0.1.0,<0.2.0"
 numpy = [
-    {version = ">=1,<2", markers = "python_version < \"3.12\""},
     {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1,<2", markers = "python_version < \"3.12\""},
 ]
 PyYAML = ">=5.3"
 requests = ">=2,<3"
@@ -2533,8 +2761,8 @@ jsonpatch = ">=1.33,<2.0"
 langsmith = ">=0.1.75,<0.2.0"
 packaging = ">=23.2,<25"
 pydantic = [
-    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
     {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
+    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
 ]
 PyYAML = ">=5.3"
 tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
@@ -2585,8 +2813,8 @@ files = [
 httpx = ">=0.23.0,<1"
 orjson = ">=3.9.14,<4.0.0"
 pydantic = [
-    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
     {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
+    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
 ]
 requests = ">=2,<3"
 
@@ -3528,8 +3756,8 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -3600,6 +3828,17 @@ files = [
 qa = ["flake8 (==5.0.4)", "mypy (==0.971)", "types-setuptools (==67.2.0.1)"]
 testing = ["docopt", "pytest"]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -3786,6 +4025,23 @@ files = [
 [package.dependencies]
 wcwidth = "*"
 
+[[package]]
+name = "proto-plus"
+version = "1.25.0"
+description = "Beautiful, Pythonic protocol buffers."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"},
+    {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"},
+]
+
+[package.dependencies]
+protobuf = ">=3.19.0,<6.0.0dev"
+
+[package.extras]
+testing = ["google-api-core (>=1.31.5)"]
+
 [[package]]
 name = "protobuf"
 version = "4.25.4"
@@ -3922,6 +4178,31 @@ files = [
     {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.1"
+description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
+    {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.1"
+description = "A collection of ASN.1-based protocols modules"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"},
+    {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.4.6,<0.7.0"
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -3948,8 +4229,8 @@ files = [
 annotated-types = ">=0.4.0"
 pydantic-core = "2.20.1"
 typing-extensions = [
-    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
     {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
+    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
 ]
 
 [package.extras]
@@ -4743,6 +5024,47 @@ files = [
     {file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
 ]
 
+[[package]]
+name = "rsa"
+version = "4.9"
+description = "Pure-Python RSA implementation"
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
+    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.1.3"
+
+[[package]]
+name = "ruff"
+version = "0.8.0"
+description = "An extremely fast Python linter and code formatter, written in Rust."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruff-0.8.0-py3-none-linux_armv6l.whl", hash = "sha256:fcb1bf2cc6706adae9d79c8d86478677e3bbd4ced796ccad106fd4776d395fea"},
+    {file = "ruff-0.8.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:295bb4c02d58ff2ef4378a1870c20af30723013f441c9d1637a008baaf928c8b"},
+    {file = "ruff-0.8.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7b1f1c76b47c18fa92ee78b60d2d20d7e866c55ee603e7d19c1e991fad933a9a"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb0d4f250a7711b67ad513fde67e8870109e5ce590a801c3722580fe98c33a99"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e55cce9aa93c5d0d4e3937e47b169035c7e91c8655b0974e61bb79cf398d49c"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f4cd64916d8e732ce6b87f3f5296a8942d285bbbc161acee7fe561134af64f9"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c5c1466be2a2ebdf7c5450dd5d980cc87c8ba6976fb82582fea18823da6fa362"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2dabfd05b96b7b8f2da00d53c514eea842bff83e41e1cceb08ae1966254a51df"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:facebdfe5a5af6b1588a1d26d170635ead6892d0e314477e80256ef4a8470cf3"},
+    {file = "ruff-0.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87a8e86bae0dbd749c815211ca11e3a7bd559b9710746c559ed63106d382bd9c"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:85e654f0ded7befe2d61eeaf3d3b1e4ef3894469cd664ffa85006c7720f1e4a2"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:83a55679c4cb449fa527b8497cadf54f076603cc36779b2170b24f704171ce70"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:812e2052121634cf13cd6fddf0c1871d0ead1aad40a1a258753c04c18bb71bbd"},
+    {file = "ruff-0.8.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:780d5d8523c04202184405e60c98d7595bdb498c3c6abba3b6d4cdf2ca2af426"},
+    {file = "ruff-0.8.0-py3-none-win32.whl", hash = "sha256:5fdb6efecc3eb60bba5819679466471fd7d13c53487df7248d6e27146e985468"},
+    {file = "ruff-0.8.0-py3-none-win_amd64.whl", hash = "sha256:582891c57b96228d146725975fbb942e1f30a0c4ba19722e692ca3eb25cc9b4f"},
+    {file = "ruff-0.8.0-py3-none-win_arm64.whl", hash = "sha256:ba93e6294e9a737cd726b74b09a6972e36bb511f9a102f1d9a7e1ce94dd206a6"},
+    {file = "ruff-0.8.0.tar.gz", hash = "sha256:a7ccfe6331bf8c8dad715753e157457faf7351c2b69f62f32c165c2dbcbacd44"},
+]
+
 [[package]]
 name = "s3transfer"
 version = "0.10.2"
@@ -5788,6 +6110,17 @@ files = [
 [package.extras]
 dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"]
 
+[[package]]
+name = "uritemplate"
+version = "4.1.1"
+description = "Implementation of RFC 6570 URI Templates"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"},
+    {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"},
+]
+
 [[package]]
 name = "urllib3"
 version = "2.2.2"
@@ -6128,5 +6461,5 @@ multidict = ">=4.0"
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.11, <4.0
-content-hash = "4d44108f296caafc4f938300bcd09141d2bed45c88bfbed06081be67f01ae868"
+python-versions = ">=3.11, <4.0"
+content-hash = "df5b3eaad85fc2f943506d095b2e3f7094982d55d461f40a7be13d9bb742fc6f"
diff --git a/pyproject.toml b/pyproject.toml
index a3b7cd42a..c064d819a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,10 +17,7 @@ packages = [
 [tool.poetry.dependencies]
 python = ">=3.11, <4.0"
 adalflow = { path = "adalflow", develop = true }
-# torch = "^2.3.1"
 openai = "^1.34.0"
-# lightrag = {path = "lightrag/dist/lightrag-0.0.0a11-py3-none-any.whl"}
-# lightrag = "^0.0.0a13"
 
 
 [tool.poetry.group.dev.dependencies]
@@ -51,8 +48,25 @@ faiss-cpu = "^1.8.0.post1"
 nltk = "^3.9.1"
 ragas = "^0.1.16"
 colorama = "^0.4.6"
+black = "^24.10.0"
+ruff = "^0.8.0"
+google-generativeai = "^0.8.3"
 
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
\ No newline at end of file
+build-backend = "poetry.core.masonry.api"
+
+
+# for formatting and linting
+[tool.black]
+line-length = 88
+target-version = ["py311"]
+
+[tool.ruff]
+lint.extend-ignore = [
+    "E402",  # Ignore module-level import issues
+    "E731",
+    "UP007", # Wants | over Union, which breaks 3.8
+]
+line-length = 88
diff --git a/tutorials/component.ipynb b/tutorials/component.ipynb
deleted file mode 100644
index 17e371a47..000000000
--- a/tutorials/component.ipynb
+++ /dev/null
@@ -1,711 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import re\n",
-    "from adalflow.core import Component, Generator\n",
-    "from adalflow.components.model_client import OpenAIClient\n",
-    "from adalflow.components.model_client import GroqAPIClient\n",
-    "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "template_doc = r\"\"\"<SYS> You are a doctor </SYS> User: {{input_str}}\"\"\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's turn on the library log to help with debugging."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<RootLogger root (INFO)>"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from adalflow.utils import get_logger\n",
-    "get_logger()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Toy example\n",
-    "\n",
-    "class DocQA(Component):\n",
-    "    def __init__(self):\n",
-    "        super(DocQA, self).__init__()\n",
-    "        self.doc = Generator(\n",
-    "            template=template_doc,\n",
-    "            model_client=OpenAIClient(),\n",
-    "            model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n",
-    "        )\n",
-    "\n",
-    "    def call(self, query: str) -> str:\n",
-    "        return self.doc(prompt_kwargs={\"input_str\": query}).data\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'prompt_variables': [], 'preset_prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, '_api_key': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'template': '<SYS> You are a doctor </SYS> User: {{input_str}}', 'preset_prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, '_trainable_params': []}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False}}\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'_components': OrderedDict([('doc',\n",
-       "               Generator(\n",
-       "                 model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-       "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}})\n",
-       "                 (model_client): OpenAIClient()\n",
-       "               ))]),\n",
-       " '_parameters': OrderedDict(),\n",
-       " 'training': False}"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# states\n",
-    "states = doc.to_dict()\n",
-    "print(states)\n",
-    "doc.__dict__"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'_components': OrderedDict([('doc',\n",
-       "               Generator(\n",
-       "                 model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-       "                 (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}})\n",
-       "                 (model_client): OpenAIClient()\n",
-       "               ))]),\n",
-       " '_parameters': OrderedDict(),\n",
-       " 'training': False}"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# restore the states\n",
-    "doc2 = DocQA.from_dict(states)\n",
-    "# print(doc2.call(\"What is the capital of France?\"))\n",
-    "doc2.__dict__\n",
-    "# doc2.to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'type': 'DocQA',\n",
-       " 'data': {'_components': {'_ordered_dict': True,\n",
-       "   'data': [('doc',\n",
-       "     {'type': 'Generator',\n",
-       "      'data': {'_components': {'_ordered_dict': True,\n",
-       "        'data': [('prompt',\n",
-       "          {'type': 'Prompt',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "            'prompt_variables': [],\n",
-       "            'preset_prompt_kwargs': {}}}),\n",
-       "         ('model_client',\n",
-       "          {'type': 'OpenAIClient',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            '_api_key': None}})]},\n",
-       "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "       'training': False,\n",
-       "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "       'preset_prompt_kwargs': {},\n",
-       "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
-       "       'output_processors': None,\n",
-       "       '_trainable_params': []}})]},\n",
-       "  '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "  'training': False}}"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc2.to_dict() == doc.to_dict()\n",
-    "doc2.to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# pickle to states\n",
-    "import pickle\n",
-    "# from collections import OrderedDict\n",
-    "# from openai import OpenAI # cant pickle this\n",
-    "\n",
-    "# class DummpyDocQA():\n",
-    "#     a = OrderedDict()\n",
-    "#     def __init__(self):\n",
-    "#         self.dummpy = 1\n",
-    "#         self.client = OpenAI()\n",
-    "\n",
-    "# doc_dummy = DummpyDocQA()\n",
-    "with open(\"doc.pkl\", \"wb\") as f:\n",
-    "    pickle.dump(doc.to_dict(), f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# save the serialized states to a file\n",
-    "from adalflow.utils.file_io import save_pickle, save_json\n",
-    "states = doc.to_dict()\n",
-    "# save_json(states, \"doc.json\")\n",
-    "save_pickle(states, \"doc.pkl\")\n",
-    "\n",
-    "# load the serialized states from a file\n",
-    "from adalflow.utils.file_io import load_pickle, load_json\n",
-    "states_loaded = load_pickle(\"doc.pkl\")\n",
-    "# states_loaded = load_json(\"doc.json\")\n",
-    "\n",
-    "states_loaded == states\n",
-    "\n",
-    "doc3 = DocQA.from_dict(states_loaded)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:42:48 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-14 17:42:48 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:42:48 - WARNING - [prompt_builder.py:120:compose_prompt_kwargs] - Key input_str does not exist in the prompt_kwargs.\n",
-      "2024-06-14 17:42:48 - INFO - [openai_client.py:139:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the capital of France?'}]}\n",
-      "2024-06-14 17:42:48 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:42:48 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='The capital of France is Paris.', error=None, usage=None, raw_response='The capital of France is Paris.')\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'The capital of France is Paris.'"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc3\n",
-    "doc3.call(\"What is the capital of France?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:51 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for headache?'}\n",
-      "2024-06-14 17:12:51 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:51 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
-      "2024-06-14 17:12:54 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:54 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='As a doctor, the best treatment for a headache depends on the cause of the headache. In general, some common treatments for headaches include:\\n\\n1. Over-the-counter pain relievers such as acetaminophen, ibuprofen, or aspirin\\n2. Rest and relaxation in a quiet, dark room\\n3. Hydration\\n4. Applying a cold or warm compress to the forehead or neck\\n5. Avoiding triggers such as stress, lack of sleep, or certain foods\\n6. Practicing relaxation techniques such as deep breathing, meditation, or yoga\\n7. Prescription medications for chronic or severe headaches\\n\\nIt is important to consult with a healthcare provider for a proper diagnosis and treatment plan for your specific type of headache.', error=None, usage=None, raw_response='As a doctor, the best treatment for a headache depends on the cause of the headache. In general, some common treatments for headaches include:\\n\\n1. Over-the-counter pain relievers such as acetaminophen, ibuprofen, or aspirin\\n2. Rest and relaxation in a quiet, dark room\\n3. Hydration\\n4. Applying a cold or warm compress to the forehead or neck\\n5. Avoiding triggers such as stress, lack of sleep, or certain foods\\n6. Practicing relaxation techniques such as deep breathing, meditation, or yoga\\n7. Prescription medications for chronic or severe headaches\\n\\nIt is important to consult with a healthcare provider for a proper diagnosis and treatment plan for your specific type of headache.')\n",
-      "As a doctor, the best treatment for a headache depends on the cause of the headache. In general, some common treatments for headaches include:\n",
-      "\n",
-      "1. Over-the-counter pain relievers such as acetaminophen, ibuprofen, or aspirin\n",
-      "2. Rest and relaxation in a quiet, dark room\n",
-      "3. Hydration\n",
-      "4. Applying a cold or warm compress to the forehead or neck\n",
-      "5. Avoiding triggers such as stress, lack of sleep, or certain foods\n",
-      "6. Practicing relaxation techniques such as deep breathing, meditation, or yoga\n",
-      "7. Prescription medications for chronic or severe headaches\n",
-      "\n",
-      "It is important to consult with a healthcare provider for a proper diagnosis and treatment plan for your specific type of headache.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(doc(\"What is the best treatment for headache?\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:54 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for headache?'}\n",
-      "2024-06-14 17:12:54 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:54 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?'}]}\n",
-      "2024-06-14 17:12:56 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:56 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='As a doctor, the best treatment for a headache can depend on the cause of the headache. If the headache is mild and infrequent, over-the-counter pain relievers such as ibuprofen or acetaminophen can help. Additionally, getting enough rest, staying hydrated, and practicing relaxation techniques such as deep breathing exercises or meditation can also provide relief. If the headache is severe, persistent, or accompanied by other concerning symptoms, it is important to consult a healthcare professional for a proper diagnosis and individualized treatment plan.', error=None, usage=None, raw_response='As a doctor, the best treatment for a headache can depend on the cause of the headache. If the headache is mild and infrequent, over-the-counter pain relievers such as ibuprofen or acetaminophen can help. Additionally, getting enough rest, staying hydrated, and practicing relaxation techniques such as deep breathing exercises or meditation can also provide relief. If the headache is severe, persistent, or accompanied by other concerning symptoms, it is important to consult a healthcare professional for a proper diagnosis and individualized treatment plan.')\n",
-      "As a doctor, the best treatment for a headache can depend on the cause of the headache. If the headache is mild and infrequent, over-the-counter pain relievers such as ibuprofen or acetaminophen can help. Additionally, getting enough rest, staying hydrated, and practicing relaxation techniques such as deep breathing exercises or meditation can also provide relief. If the headache is severe, persistent, or accompanied by other concerning symptoms, it is important to consult a healthcare professional for a proper diagnosis and individualized treatment plan.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(doc2(\"What is the best treatment for headache?\"))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('', DocQA(\n",
-      "  (doc): Generator(\n",
-      "    model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-      "    (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
-      "    (model_client): OpenAIClient()\n",
-      "  )\n",
-      "))\n",
-      "('doc', Generator(\n",
-      "  model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-      "  (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
-      "  (model_client): OpenAIClient()\n",
-      "))\n",
-      "('doc.prompt', Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str']))\n",
-      "('doc.model_client', OpenAIClient())\n"
-     ]
-    }
-   ],
-   "source": [
-    "# list other subcomponents\n",
-    "\n",
-    "for subcomponent in doc.named_components():\n",
-    "    print(subcomponent)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's add a parameter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from adalflow.core.parameter import Parameter\n",
-    "\n",
-    "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('demo', Parameter: demo)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# list all parameters\n",
-    "for param in doc.named_parameters():\n",
-    "    print(param)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'type': 'DocQA',\n",
-       " 'data': {'_components': {'_ordered_dict': True,\n",
-       "   'data': [('doc',\n",
-       "     {'type': 'Generator',\n",
-       "      'data': {'_components': {'_ordered_dict': True,\n",
-       "        'data': [('prompt',\n",
-       "          {'type': 'Prompt',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            '_template_string': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "            'template': <Template memory:15f114f50>,\n",
-       "            'prompt_variables': ['input_str'],\n",
-       "            'preset_prompt_kwargs': {}}}),\n",
-       "         ('model_client',\n",
-       "          {'type': 'OpenAIClient',\n",
-       "           'data': {'_components': {'_ordered_dict': True, 'data': []},\n",
-       "            '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "            'training': False,\n",
-       "            'sync_client': <openai.OpenAI at 0x15cfc1cd0>,\n",
-       "            'async_client': None,\n",
-       "            '_api_key': None}})]},\n",
-       "       '_parameters': {'_ordered_dict': True, 'data': []},\n",
-       "       'training': False,\n",
-       "       'template': '<SYS> You are a doctor </SYS> User: {{input_str}}',\n",
-       "       'preset_prompt_kwargs': {},\n",
-       "       'model_kwargs': {'model': 'gpt-3.5-turbo'},\n",
-       "       'output_processors': None,\n",
-       "       '_trainable_params': []}})]},\n",
-       "  '_parameters': {'_ordered_dict': True,\n",
-       "   'data': [('demo', {'data': 'demo', 'requires_opt': True})]},\n",
-       "  'training': False}}"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc.to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from adalflow.utils.file_io import save_json\n",
-    "\n",
-    "save_json(doc.to_dict(), \"doc.json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "OrderedDict([('demo', Parameter: demo)])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc.state_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:56 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for a cold?'}\n",
-      "2024-06-14 17:12:56 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:56 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for a cold?'}]}\n",
-      "2024-06-14 17:12:57 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:57 - INFO - [generator.py:208:call] - output: GeneratorOutput(data=\"As a doctor, I recommend a combination of rest, staying hydrated, over-the-counter cold medications (such as decongestants or pain relievers), throat lozenges, and steam inhalation. It's also important to eat a balanced diet, get plenty of rest, and avoid close contact with others to prevent spreading the cold. If symptoms persist or worsen, it's best to consult with a healthcare provider for further evaluation and treatment.\", error=None, usage=None, raw_response=\"As a doctor, I recommend a combination of rest, staying hydrated, over-the-counter cold medications (such as decongestants or pain relievers), throat lozenges, and steam inhalation. It's also important to eat a balanced diet, get plenty of rest, and avoid close contact with others to prevent spreading the cold. If symptoms persist or worsen, it's best to consult with a healthcare provider for further evaluation and treatment.\")\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "\"As a doctor, I recommend a combination of rest, staying hydrated, over-the-counter cold medications (such as decongestants or pain relievers), throat lozenges, and steam inhalation. It's also important to eat a balanced diet, get plenty of rest, and avoid close contact with others to prevent spreading the cold. If symptoms persist or worsen, it's best to consult with a healthcare provider for further evaluation and treatment.\""
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc.call(\"What is the best treatment for a cold?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2\n",
-      "<class 'adalflow.core.component.FunComponent'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "from adalflow.core.component import FunComponent\n",
-    "\n",
-    "def add_one(x):\n",
-    "    return x + 1\n",
-    "\n",
-    "fun_component = FunComponent(add_one)\n",
-    "print(fun_component(1))  \n",
-    "print(type(fun_component))  \n",
-    "\n",
-    "# output:\n",
-    "# 2\n",
-    "# <class 'core.component.FunComponent'>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2\n",
-      "<class 'adalflow.core.component.AddOneComponent'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "from adalflow.core.component import fun_to_component \n",
-    "\n",
-    "fun_component = fun_to_component(add_one)\n",
-    "print(fun_component(1))\n",
-    "print(type(fun_component))\n",
-    "\n",
-    "# output:\n",
-    "# 2\n",
-    "# <class 'adalflow.core.component.AddOneComponent'>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2\n",
-      "<class 'adalflow.core.component.AddOneComponent'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "# use it as a decorator\n",
-    "@fun_to_component\n",
-    "def add_one(x):\n",
-    "    return x + 1\n",
-    "\n",
-    "print(add_one(1))\n",
-    "print(type(add_one))\n",
-    "\n",
-    "# output:\n",
-    "# 2\n",
-    "# <class 'adalflow.core.component.AddOneComponent'>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-14 17:12:57 - INFO - [generator.py:199:call] - prompt_kwargs: {'input_str': 'What is the best treatment for headache?Please be concise and only list the top treatments.'}\n",
-      "2024-06-14 17:12:57 - INFO - [generator.py:200:call] - model_kwargs: {}\n",
-      "2024-06-14 17:12:57 - INFO - [openai_client.py:140:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> You are a doctor </SYS> User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n",
-      "2024-06-14 17:12:58 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-14 17:12:58 - INFO - [generator.py:208:call] - output: GeneratorOutput(data='1. Over-the-counter pain relievers such as ibuprofen or acetaminophen\\n2. Stay hydrated and rest\\n3. Apply a cold compress to the forehead\\n4. Practice relaxation techniques such as deep breathing or meditation', error=None, usage=None, raw_response='1. Over-the-counter pain relievers such as ibuprofen or acetaminophen\\n2. Stay hydrated and rest\\n3. Apply a cold compress to the forehead\\n4. Practice relaxation techniques such as deep breathing or meditation')\n",
-      "1. Over-the-counter pain relievers such as ibuprofen or acetaminophen\n",
-      "2. Stay hydrated and rest\n",
-      "3. Apply a cold compress to the forehead\n",
-      "4. Practice relaxation techniques such as deep breathing or meditation\n"
-     ]
-    }
-   ],
-   "source": [
-    "from adalflow.core.component import Sequential\n",
-    "\n",
-    "@fun_to_component\n",
-    "def enhance_query(query:str) -> str:\n",
-    "    return query + \"Please be concise and only list the top treatments.\"\n",
-    "\n",
-    "seq = Sequential(enhance_query, doc)\n",
-    "\n",
-    "query = \"What is the best treatment for headache?\"\n",
-    "print(seq(query))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# sequential with just a function, will raise error\n",
-    "# def enhance_query(query:str) -> str:\n",
-    "#     return query + \"Please be concise and only list the top treatments.\"\n",
-    "# seq2 = Sequential(enhance_query, doc)\n",
-    "# print(seq2(query))\n",
-    "# print(seq2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Sequential(\n",
-       "  (0): EnhanceQueryComponent()\n",
-       "  (1): DocQA(\n",
-       "    (doc): Generator(\n",
-       "      model_kwargs={'model': 'gpt-3.5-turbo'}, \n",
-       "      (prompt): Prompt(template: <SYS> You are a doctor </SYS> User: {{input_str}}, prompt_variables: ['input_str'])\n",
-       "      (model_client): OpenAIClient()\n",
-       "    )\n",
-       "  )\n",
-       ")"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "seq"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# TODO: LLM for single choices"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tutorials/component.py b/tutorials/component.py
new file mode 100644
index 000000000..3f4543901
--- /dev/null
+++ b/tutorials/component.py
@@ -0,0 +1,119 @@
+from adalflow.core import Component, Generator
+from adalflow.components.model_client import OpenAIClient
+
+from getpass import getpass
+import os
+
+# Prompt user to enter their API keys securely
+openai_api_key = getpass("Please enter your OpenAI API key: ")
+# Set environment variables
+os.environ["OPENAI_API_KEY"] = openai_api_key
+print("API keys have been set.")
+
+template_doc = r"""<SYS> You are a doctor </SYS> User: {{input_str}}"""
+
+from adalflow.utils import get_logger
+
+get_logger()
+
+
+class DocQA(Component):
+    def __init__(self):
+        super(DocQA, self).__init__()
+        self.doc = Generator(
+            template=template_doc,
+            model_client=OpenAIClient(),
+            model_kwargs={"model": "gpt-3.5-turbo"},
+        )
+
+    def call(self, query: str) -> str:
+        return self.doc(prompt_kwargs={"input_str": query}).data
+
+
+doc = DocQA()
+# states
+states = doc.to_dict()
+print(states)
+print(doc.__dict__)
+
+# restore the states
+doc2 = DocQA.from_dict(states)
+# print(doc2.call("What is the capital of France?"))
+print(doc2.__dict__)
+print(doc2.to_dict())
+
+print(doc2.to_dict() == doc.to_dict())
+doc2.to_dict()
+
+print(doc("What is the best treatment for headache?"))
+print(doc2("What is the best treatment for headache?"))
+
+# list other subcomponents
+for subcomponent in doc.named_components():
+    print(subcomponent)
+
+from adalflow.optim.parameter import Parameter
+
+doc.register_parameter("demo", param=Parameter(data="demo"))
+
+# list all parameters
+for param in doc.named_parameters():
+    print(param)
+
+print(doc.to_dict())
+
+from adalflow.utils.file_io import save_json
+
+save_json(doc.to_dict(), "doc.json")
+
+print(doc.state_dict())
+print(doc.call("What is the best treatment for a cold?"))
+
+from adalflow.core.component import FunComponent
+
+
+def add_one(x):
+    return x + 1
+
+
+fun_component = FunComponent(add_one)
+print(fun_component(1))
+print(type(fun_component))
+# output:
+# 2
+# <class 'core.component.FunComponent'>
+
+from adalflow.core.component import fun_to_component
+
+fun_component = fun_to_component(add_one)
+print(fun_component(1))
+print(type(fun_component))
+# output:
+# 2
+# <class 'adalflow.core.component.AddOneComponent'>
+
+
+# use it as a decorator
+@fun_to_component
+def add_one(x):
+    return x + 1
+
+
+print(add_one(1))
+print(type(add_one))
+# output:
+# 2
+# <class 'adalflow.core.component.AddOneComponent'>
+
+from adalflow.core import Sequential
+
+
+@fun_to_component
+def enhance_query(query: str) -> str:
+    return query + "Please be concise and only list the top treatments."
+
+
+seq = Sequential(enhance_query, doc)
+query = "What is the best treatment for headache?"
+print(seq(query))
+print(seq)
diff --git a/tutorials/dataclass.ipynb b/tutorials/dataclass.ipynb
index 1b8cc5193..e2631c2bb 100644
--- a/tutorials/dataclass.ipynb
+++ b/tutorials/dataclass.ipynb
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -77,18 +77,18 @@
     }
    ],
    "source": [
-    "# it does not allow required field after optional field\n",
-    "@dataclass\n",
-    "class TrecData2:\n",
-    "    question: Question = field(\n",
-    "        metadata={\"desc\": \"The question asked by the user\"}\n",
-    "    ) # Required field, you have to provide the question field at the instantiation\n",
-    "    label: int = field(\n",
-    "        metadata={\"desc\": \"The label of the question\"}, default=0\n",
-    "    ) # Optional field\n",
-    "    metadata: dict = field(\n",
-    "        metadata={\"desc\": \"The metadata of the question\"}\n",
-    "    ) # required field"
+    "# # it does not allow required field after optional field\n",
+    "# @dataclass\n",
+    "# class TrecData2:\n",
+    "#     question: Question = field(\n",
+    "#         metadata={\"desc\": \"The question asked by the user\"}\n",
+    "#     ) # Required field, you have to provide the question field at the instantiation\n",
+    "#     label: int = field(\n",
+    "#         metadata={\"desc\": \"The label of the question\"}, default=0\n",
+    "#     ) # Optional field\n",
+    "#     metadata: dict = field(\n",
+    "#         metadata={\"desc\": \"The metadata of the question\"}\n",
+    "#     ) # required field"
    ]
   },
   {
diff --git a/tutorials/embedder.ipynb b/tutorials/embedder.ipynb
index b7a5c7143..296254543 100644
--- a/tutorials/embedder.ipynb
+++ b/tutorials/embedder.ipynb
@@ -15,7 +15,6 @@
    "source": [
     "from adalflow.core.embedder import Embedder\n",
     "from adalflow.components.model_client import OpenAIClient\n",
-    "from adalflow.utils import setup_env # ensure you setup OPENAI_API_KEY in your project .env file\n",
     "\n",
     "model_kwargs = {\n",
     "    \"model\": \"text-embedding-3-small\",\n",
diff --git a/tutorials/generator.ipynb b/tutorials/generator.ipynb
index 1bf478655..e8a3fac2b 100644
--- a/tutorials/generator.ipynb
+++ b/tutorials/generator.ipynb
@@ -44,7 +44,7 @@
    ],
    "source": [
     "from adalflow.core import Generator\n",
-    "from adalflow.components.model_client import OpenAIClient, get_all_messages_content, get_probabilities\n",
+    "from adalflow.components.model_client import OpenAIClient, get_probabilities\n",
     "from adalflow.utils import enable_library_logging\n",
     "\n",
     "enable_library_logging(level=\"DEBUG\")\n",
@@ -78,7 +78,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from adalflow.core import Component, Generator, Prompt\n",
+    "from adalflow.core import Component, Generator\n",
     "from adalflow.components.model_client import GroqAPIClient\n",
     "from adalflow.utils import setup_env # noqa\n",
     "\n",
diff --git a/tutorials/generator_all_providers.py b/tutorials/generator_all_providers.py
new file mode 100644
index 000000000..21d61d08b
--- /dev/null
+++ b/tutorials/generator_all_providers.py
@@ -0,0 +1,53 @@
+# This doc shows how to use all different providers in the Generator class.
+
+import adalflow as adal
+
+
+def use_all_providers():
+    openai_llm = adal.Generator(
+        model_client=adal.OpenAIClient(),
+        model_kwargs={"model": "gpt-3.5-turbo"},
+    )
+    groq_llm = adal.Generator(
+        model_client=adal.GroqAPIClient(),
+        model_kwargs={"model": "llama3-8b-8192"},
+    )
+    anthropic_llm = adal.Generator(
+        model_client=adal.AnthropicAPIClient(),
+        model_kwargs={"model": "claude-3-5-sonnet-20241022"},
+    )
+    google_gen_ai_llm = adal.Generator(
+        model_client=adal.GoogleGenAIClient(),
+        model_kwargs={"model": "gemini-1.0-pro"},
+    )
+    ollama_llm = adal.Generator(
+        model_client=adal.OllamaClient(),
+        model_kwargs={"model": "llama3.2:1b"},
+    )
+    # need to run ollama pull llama3.2:1b first to use this model
+
+    # aws_bedrock_llm = adal.Generator(
+    #     model_client=adal.BedrockAPIClient(),
+    #     model_kwargs={"modelId": "amazon.mistral.instruct-7b"},
+    # )
+
+    prompt_kwargs = {"input_str": "What is the meaning of life in one sentence?"}
+
+    openai_response = openai_llm(prompt_kwargs)
+    groq_response = groq_llm(prompt_kwargs)
+    anthropic_response = anthropic_llm(prompt_kwargs)
+    google_gen_ai_response = google_gen_ai_llm(prompt_kwargs)
+    ollama_response = ollama_llm(prompt_kwargs)
+    # aws_bedrock_llm_response = aws_bedrock_llm(prompt_kwargs)
+
+    print(f"OpenAI: {openai_response}\n")
+    print(f"Groq: {groq_response}\n")
+    print(f"Anthropic: {anthropic_response}\n")
+    print(f"Google GenAI: {google_gen_ai_response}\n")
+    print(f"Ollama: {ollama_response}\n")
+    # print(f"AWS Bedrock: {aws_bedrock_llm_response}\n")
+
+
+if __name__ == "__main__":
+    adal.setup_env()
+    use_all_providers()
diff --git a/tutorials/model_client.ipynb b/tutorials/model_client.ipynb
index 3228d8c19..3e5b7b061 100644
--- a/tutorials/model_client.ipynb
+++ b/tutorials/model_client.ipynb
@@ -26,7 +26,6 @@
    "source": [
     "from adalflow.components.model_client import OpenAIClient\n",
     "from adalflow.core.types import ModelType\n",
-    "from adalflow.utils import setup_env\n",
     "\n",
     "openai_client = OpenAIClient()\n",
     "\n",
diff --git a/tutorials/react_note.ipynb b/tutorials/react_note.ipynb
index 2d0f2be0e..0b647a4bc 100644
--- a/tutorials/react_note.ipynb
+++ b/tutorials/react_note.ipynb
@@ -8,7 +8,7 @@
    "source": [
     "from adalflow.components.agent import ReActAgent\n",
     "from adalflow.core import Generator, ModelClientType, ModelClient\n",
-    "from adalflow.utils import setup_env, get_logger\n",
+    "from adalflow.utils import setup_env\n",
     "\n",
     "# get_logger(level=\"DEBUG\")\n",
     "\n",
diff --git a/tutorials/retriever.ipynb b/tutorials/retriever.ipynb
index 413dc4656..c464f46b8 100644
--- a/tutorials/retriever.ipynb
+++ b/tutorials/retriever.ipynb
@@ -536,7 +536,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -551,7 +551,7 @@
    ],
    "source": [
     "# try to use title this time\n",
-    "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"]\n",
+    "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"] # no \n",
     "\n",
     "reranker.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n",
     "\n",
@@ -1300,7 +1300,6 @@
     "\n",
     "from adalflow.tracing import trace_generator_call\n",
     "\n",
-    "from adalflow.utils import setup_env\n",
     "\n",
     "# 1. set up the tracing for failed call as the retriever has generator attribute\n",
     "\n",
diff --git a/tutorials/tools.ipynb b/tutorials/tools.ipynb
index 3490fe7d4..c32b94204 100644
--- a/tutorials/tools.ipynb
+++ b/tutorials/tools.ipynb
@@ -17,7 +17,6 @@
    "source": [
     "from openai import OpenAI\n",
     "import json\n",
-    "from adalflow.utils import setup_env\n",
     "\n",
     "client = OpenAI()\n",
     "\n",
@@ -106,7 +105,7 @@
    "outputs": [],
    "source": [
     "from dataclasses import dataclass\n",
-    "from typing import Any, Dict, List, Tuple\n",
+    "from typing import Any, Dict, List\n",
     "import numpy as np\n",
     "import time\n",
     "import asyncio\n",
@@ -445,12 +444,10 @@
    "source": [
     "# call all the above functions \n",
     "import nest_asyncio\n",
-    "import asyncio\n",
     "\n",
     "nest_asyncio.apply()\n",
     "\n",
     "\n",
-    "import time\n",
     "\n",
     "async def async_function_1():\n",
     "    await asyncio.sleep(1)\n",
@@ -1290,13 +1287,7 @@
     }
    ],
    "source": [
-    "import ast\n",
-    "import builtins\n",
-    "import contextlib\n",
-    "import ctypes\n",
-    "import sys\n",
     "import threading\n",
-    "import time\n",
     "\n",
     "# Define a list of safe built-ins\n",
     "SAFE_BUILTINS = {\n",
@@ -1787,7 +1778,6 @@
    "source": [
     "queries = [\"add 2 and 3\", \"search for something\", \"add points (1, 2) and (3, 4)\", \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\", \"multiply 2 with local variable x\", \"divide 2 by 3\"]\n",
     "\n",
-    "from adalflow.components.output_parsers import ListOutputParser\n",
     "from adalflow.core.string_parser import JsonParser # improve a list of json\n",
     "\n",
     "preset_prompt_kwargs = {\n",
@@ -1982,9 +1972,6 @@
     "# first check the openai's function call apis\n",
     "\n",
     "from openai import OpenAI\n",
-    "from openai.types import FunctionDefinition\n",
-    "from adalflow.utils import setup_env\n",
-    "import json\n",
     "\n",
     "client = OpenAI()\n",
     "\n",
@@ -2242,8 +2229,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "adalflow_fn_schema =\n",
-    "{\n",
+    "adalflow_fn_schema ={\n",
     "        \"type\": \"object\",\n",
     "        \"properties\": {\n",
     "            \"weather\": {\n",
@@ -2284,31 +2270,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "    llama_fn_schema = {\n",
-    "        \"type\": \"object\",\n",
-    "        \"properties\": {\"weather\": {\"$ref\": \"#/definitions/Weather\"}},\n",
-    "        \"required\": [\"weather\"],\n",
-    "        \"definitions\": {\n",
-    "            \"Weather\": {\n",
-    "                \"title\": \"Weather\",\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"location\": {\n",
-    "                        \"title\": \"Location\",\n",
-    "                        \"desc\": \"The city and state, e.g. San Francisco, CA\",\n",
-    "                        \"type\": \"string\",\n",
-    "                    },\n",
-    "                    \"unit\": {\n",
-    "                        \"title\": \"Unit\",\n",
-    "                        \"enum\": [\"celsius\", \"fahrenheit\"],\n",
-    "                        \"type\": \"string\",\n",
-    "                    },\n",
+    "llama_fn_schema = {\n",
+    "    \"type\": \"object\",\n",
+    "    \"properties\": {\"weather\": {\"$ref\": \"#/definitions/Weather\"}},\n",
+    "    \"required\": [\"weather\"],\n",
+    "    \"definitions\": {\n",
+    "        \"Weather\": {\n",
+    "            \"title\": \"Weather\",\n",
+    "            \"type\": \"object\",\n",
+    "            \"properties\": {\n",
+    "                \"location\": {\n",
+    "                    \"title\": \"Location\",\n",
+    "                    \"desc\": \"The city and state, e.g. San Francisco, CA\",\n",
+    "                    \"type\": \"string\",\n",
     "                },\n",
-    "                \"required\": [\"location\", \"unit\"],\n",
-    "                \"additionalProperties\": false,\n",
-    "            }\n",
-    "        },\n",
-    "    }"
+    "                \"unit\": {\n",
+    "                    \"title\": \"Unit\",\n",
+    "                    \"enum\": [\"celsius\", \"fahrenheit\"],\n",
+    "                    \"type\": \"string\",\n",
+    "                },\n",
+    "            },\n",
+    "            \"required\": [\"location\", \"unit\"],\n",
+    "            \"additionalProperties\": False,\n",
+    "        }\n",
+    "    },\n",
+    "}"
    ]
   },
   {
@@ -2319,7 +2305,6 @@
    "source": [
     "# level 1, call function with default python data types\n",
     "# such as str, int, float, list, dict, etc.\n",
-    "\n",
     "def _get_current_weather(location: str, unit: str = \"fahrenheit\"):\n",
     "    \"\"\"Get the current weather in a given location\"\"\"\n",
     "    if \"tokyo\" in location.lower():\n",
diff --git a/use_cases/classification/train.py b/use_cases/classification/train.py
index f287c164a..0bdbd5629 100644
--- a/use_cases/classification/train.py
+++ b/use_cases/classification/train.py
@@ -126,7 +126,7 @@ def train(
         debug=False,
         max_steps=12,
         strategy="constrained",
-        optimization_order="sequential"
+        optimization_order="sequential",
     )
     # val 0.694 -> 0.833, #test 0.8472 -> 0.833, adding more shots does not help
     # NOTE: raw: 40, bootstrap: 4, max_steps: 8, strategy: random, val: 86.1, test: 86.8 (+4.2% compared with dspy)
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
index c4c64fbc1..280f7c1a6 100644
--- a/use_cases/question_answering/bbh/object_count/train_new.py
+++ b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -111,7 +111,7 @@ def train(
         **gpt_3_model,
         teacher_model_config=gpt_4o_model,
         text_optimizer_model_config=gpt_4o_model,
-        backward_engine_model_config=gpt_4o_model
+        backward_engine_model_config=gpt_4o_model,
     )
     print(adal_component)
     trainer = adal.Trainer(
diff --git a/use_cases/unsorted/simple_qa_memory.py b/use_cases/unsorted/simple_qa_memory.py
index 3a7646f79..d0a1b0b64 100644
--- a/use_cases/unsorted/simple_qa_memory.py
+++ b/use_cases/unsorted/simple_qa_memory.py
@@ -1,6 +1,5 @@
-"""
-We just need to very basic generator that can be used to generate text from a prompt.
-"""
+# TODO: We need an example to demonstrate how to use the memory component in the genertor.
+
 
 # from adalflow.core.component import Component
 # from adalflow.core.memory import Memory