From 66390d6557f151b297192f6653d27b4fee7dda31 Mon Sep 17 00:00:00 2001
From: Li Yin
Date: Wed, 18 Dec 2024 09:29:25 -0800
Subject: [PATCH] add scripts to summarize multiple runs
---
adalflow/adalflow/optim/parameter.py | 82 +++++------------
adalflow/adalflow/optim/trainer/trainer.py | 21 ++++-
.../classification/train_string_output.py | 2 +-
.../bbh/object_count/train_new.py | 28 ++++--
use_cases/text_grad_2.0_train.py | 88 ++++++++++++++++++-
5 files changed, 148 insertions(+), 73 deletions(-)
diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
index 1e553302..87b90675 100644
--- a/adalflow/adalflow/optim/parameter.py
+++ b/adalflow/adalflow/optim/parameter.py
@@ -52,6 +52,9 @@ class ComponentNode:
id: str = field(metadata={"desc": "The unique id of the component"})
name: str = field(metadata={"desc": "The name of the component"})
+ type: Literal["INPUT", "COMPONENT"] = field(
+ metadata={"desc": "The type of the node"}, default="COMPONENT"
+ )
@dataclass
@@ -1052,8 +1055,10 @@ def draw_component_subgraph(
for node in component_nodes:
node_label = f"""
- ID: | {node.id} |
- Name: | {node.name} |
"""
+ Name: | {node.name} |
+ TYPE: | {node.type} |
+ """
+ # ID: | {node.id} |
# add the list of orders
if node.id in component_nodes_orders:
@@ -1068,7 +1073,7 @@ def draw_component_subgraph(
# Add edges with order labels
for source_id, target_id, edge_order in edges:
- dot.edge(source_id, target_id, label=str(edge_order), color="black")
+ dot.edge(source_id, target_id) # , label=str(edge_order), color="black")
# Step 3: Save and render
dot.render(filepath, cleanup=True)
@@ -1113,58 +1118,6 @@ def traverse(node: "Parameter"):
traverse(self)
return output_nodes, edges
- # def _collect_output_subgraph(
- # self,
- # ) -> Tuple[Set[Tuple[str, str]], List[Tuple[str, str]]]:
- # """
- # Collect OUTPUT nodes and their relationships using component_trace information.
-
- # Returns:
- # nodes (Set[Tuple[str, str]]): Set of component nodes (component_id, label).
- # edges (List[Tuple[str, str]]): Edges between component IDs.
- # """
- # component_nodes = set() # To store component nodes as (component_id, label)
- # edges = [] # To store edges between components
-
- # visited = set() # Track visited parameters to avoid cycles
-
- # def traverse(node: "Parameter"):
- # if node in visited:
- # return
- # visited.add(node)
-
- # # Only consider OUTPUT-type parameters
- # if (
- # node.param_type == ParameterType.OUTPUT
- # or "OUTPUT" in node.param_type.name
- # ):
- # component_id = node.component_trace.id
- # component_name = node.component_trace.name or "Unknown Component"
- # label = f"{component_name}\nID: {component_id}"
-
- # # Add the component as a node
- # component_nodes.add((component_id, label))
-
- # # Traverse predecessors and add edges
- # for pred in node.predecessors:
- # if pred.param_type == ParameterType.OUTPUT:
- # pred_id = pred.component_trace.id
- # pred_name = pred.component_trace.name or "Unknown Component"
-
- # # Add predecessor as a node
- # pred_label = f"{pred_name}\nID: {pred_id}"
- # component_nodes.add((pred_id, pred_label))
-
- # # Add edge between components
- # edges.append((pred_id, component_id))
-
- # # Recursive traversal
- # traverse(pred)
-
- # # Start traversal from the current parameter
- # traverse(self)
- # return component_nodes, edges
-
def _collect_component_subgraph(
self,
) -> Tuple[Set[ComponentNode], List[Tuple[str, str]]]:
@@ -1184,7 +1137,7 @@ def _collect_component_subgraph(
visited = set() # Track visited parameters to avoid cycles
edge_counter = [0] # Mutable counter for edge order tracking
- def traverse(node: "Parameter", depth: int):
+ def traverse(node: "Parameter"):
if node in visited:
return
visited.add(node)
@@ -1211,14 +1164,25 @@ def traverse(node: "Parameter", depth: int):
pred.param_type == ParameterType.OUTPUT
or "OUTPUT" in pred.param_type.name
):
- edges.append((pred_id, component_id, depth))
+ edges.append((pred_id, component_id, edge_counter[0]))
component_nodes.add(ComponentNode(id=pred_id, name=pred_name))
edge_counter[0] += 1
- traverse(pred, depth + 1)
+ if pred.param_type == ParameterType.INPUT:
+ pred_id = pred.id
+ pred_name = pred.name
+ pred_node = ComponentNode(
+ id=pred_id, name=pred_name, type="INPUT"
+ )
+ component_nodes.add(pred_node)
+ # add an edge from input to the first output
+ edges.append((pred_id, component_id, edge_counter[0]))
+ edge_counter[0] += 1
+
+ traverse(pred)
# Start traversal from the current parameter
- traverse(self, depth=0)
+ traverse(self)
# Reverse the edge order
# total_edges = len(edges)
# edges = [
diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py
index 91a2fd16..833dd1c3 100644
--- a/adalflow/adalflow/optim/trainer/trainer.py
+++ b/adalflow/adalflow/optim/trainer/trainer.py
@@ -91,6 +91,9 @@ class Trainer(Component):
batch_val_score_threshold: Optional[float] = (
1.0 # when acc_score >= this threshold, skip this batch
)
+ correct_val_score_threshold: Optional[float] = (
+ 0.5 # when acc_score >= this threshold, it is considered as correct sample
+ )
max_error_samples: Optional[int] = 2
max_correct_samples: Optional[int] = 2
debug: bool = False
@@ -106,6 +109,7 @@ def __init__(
num_workers: int = 4,
ckpt_path: str = None,
batch_val_score_threshold: Optional[float] = 1.0,
+ correct_val_score_threshold: Optional[float] = 0.5,
max_error_samples: Optional[int] = 2,
max_correct_samples: Optional[int] = 2,
max_proposals_per_step: int = 5,
@@ -140,6 +144,7 @@ def __init__(
self.val_dataset = val_dataset
self.test_dataset = test_dataset
self.batch_val_score_threshold = batch_val_score_threshold
+ self.correct_val_score_threshold = correct_val_score_threshold
self.max_error_samples = max_error_samples
self.max_correct_samples = max_correct_samples
self.max_proposals_per_step = max_proposals_per_step
@@ -1680,10 +1685,18 @@ def _moving_batch_sample(
# ensure only 0 and 1 in the acc_score_list
import numpy as np
- if not all([score in [0, 1] for score in acc_score_list]):
+ if not all(0 <= score <= 1 for score in acc_score_list):
raise ValueError("acc_score_list should only contain 0 and 1")
- correct_indices = [i for i, score in enumerate(acc_score_list) if score == 1]
- error_indices = [i for i, score in enumerate(acc_score_list) if score == 0]
+ correct_indices = [
+ i
+ for i, score in enumerate(acc_score_list)
+ if score > self.correct_val_score_threshold
+ ]
+ error_indices = [
+ i
+ for i, score in enumerate(acc_score_list)
+ if score <= self.correct_val_score_threshold
+ ]
print(f"Moving batch correct size: {len(correct_indices)}")
print(f"Moving batch error size: {len(error_indices)}")
if len(error_indices) == 0:
@@ -1984,6 +1997,8 @@ def _fit_text_grad_constraint(
**step_result,
)
+ # reset the moving batch
+
all_samples, all_losses, all_y_preds = [], [], []
else:
diff --git a/use_cases/classification/train_string_output.py b/use_cases/classification/train_string_output.py
index 9ecdef27..45fe5bcf 100644
--- a/use_cases/classification/train_string_output.py
+++ b/use_cases/classification/train_string_output.py
@@ -7,7 +7,7 @@
from use_cases.classification.data import load_datasets, TRECExtendedData
from adalflow.eval.answer_match_acc import AnswerMatchAcc
-from LightRAG.use_cases.config import (
+from use_cases.config import (
gpt_3_model,
gpt_4o_model,
)
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
index 48309aa7..5cf29ae6 100644
--- a/use_cases/question_answering/bbh/object_count/train_new.py
+++ b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -140,20 +140,36 @@ def train(
if __name__ == "__main__":
- import sys
import json
+ # make the strategy configurable in the script
+ import argparse
+
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("--strategy", type=str, default="random")
+ parser.add_argument(
+ "output_path", nargs="?", help="File path to save the checkpoint"
+ )
+
+ args = parser.parse_args()
+
+ set_strategy = args.strategy
+ set_output_path = args.output_path
+
ckpt = train(
debug=False,
- max_steps=12,
- strategy="constrained",
+ max_steps=2,
+ strategy=set_strategy,
exclude_input_fields_from_bootstrap_demos=True,
)
print(f"ckpt: {ckpt}")
- # Save ckpt to a file passed as an argument
- if len(sys.argv) > 1: # Check if a file path is provided
- with open(sys.argv[1], "w") as f:
+ if set_output_path:
+ with open(set_output_path, "w") as f:
json.dump({"ckpt": ckpt}, f)
+ print(f"Checkpoint saved to {set_output_path}")
+ else:
+ print("No file path provided for saving the checkpoint.")
# train_diagnose(**gpt_3_model)
# train_diagnose_teacher(**gpt_4o_model) # 4omini works well as an optimizer too
diff --git a/use_cases/text_grad_2.0_train.py b/use_cases/text_grad_2.0_train.py
index 37ff320d..1a029d53 100644
--- a/use_cases/text_grad_2.0_train.py
+++ b/use_cases/text_grad_2.0_train.py
@@ -2,6 +2,7 @@
import tempfile
import json
+num_runs = 2
# List of experiments to run
object_count = "use_cases/question_answering/bbh/object_count/train_new.py"
hotpot_qa_multi_hop_rag = "benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py"
@@ -14,7 +15,7 @@
# Optional: Arguments for each experiment (if needed)
experiment_args = {
- object_count: "",
+ object_count: "--strategy random",
# hotpot_qa_multi_hop_rag: "",
}
ckpt_values = {}
@@ -49,10 +50,89 @@ def run_experiment(script, args):
if __name__ == "__main__":
for experiment in experiments:
args = experiment_args.get(experiment, "")
- ckpt = run_experiment(experiment, args)
- if ckpt:
- ckpt_values[experiment] = ckpt
+ for i in range(num_runs):
+ print(f"\nRun {i + 1}/{num_runs}")
+ ckpt = run_experiment(experiment, args)
+ ckpt_index = f"{experiment}_{i + 1}"
+ if ckpt:
+ ckpt_values[ckpt_index] = ckpt
+ # load all json files using the ckpt paths
+ highest_test_score, mean_test_score, standard_deviation = 0, 0, 0
+ past_highest_scores = []
+ # average pass rate, average pass prompts
+ average_pass_rate_list = []
+ average_pass_prompts_list = []
+ average_total_prompts = []
+ total_prompts = 0
+ highest_test_score_json_file = None
+ for experiment_index, ckpt in ckpt_values.items():
+ with open(ckpt, "r") as f:
+ data = json.load(f)
+ print(f"Experiment: {experiment_index}")
+ print(f"Data: {data}")
+ _high_test_score = max(data["test_scores"])
+ print(f" test score: {data["test_scores"]}")
+ past_highest_scores.append(_high_test_score)
+ if _high_test_score > highest_test_score:
+ highest_test_score = _high_test_score
+ highest_test_score_json_file = ckpt
+ # read the effective measures
+ effective_measures = data.get("effective_measures", {})
+ if not effective_measures:
+ total_prompts = len(data["test_scores"]) - 1
+ # count the total number of different test scores
+ pass_num = len(set(data["test_scores"])) - 1
+ average_pass_rate = pass_num / total_prompts
+ average_pass_rate_list.append(average_pass_rate)
+ average_pass_prompts_list.append(pass_num)
+ average_total_prompts.append(total_prompts)
+ else:
+ total_prompts = (
+ effective_measures["subset"]["pass"]
+ + effective_measures["subset"]["fail"]
+ )
+
+ pass_num = effective_measures["valset"]["pass"]
+ total_val_prompts = (
+ effective_measures["valset"]["pass"]
+ + effective_measures["valset"]["fail"]
+ )
+ average_pass_rate = pass_num / total_val_prompts
+ average_pass_rate_list.append(average_pass_rate)
+ average_pass_prompts_list.append(pass_num)
+ average_total_prompts.append(total_prompts)
+ # calculate the mean test score
+ mean_test_score = sum(past_highest_scores) / len(past_highest_scores)
+ # calculate the standard deviation
+ standard_deviation = sum(
+ [(x - mean_test_score) ** 2 for x in past_highest_scores]
+ ) / len(past_highest_scores)
+ standard_deviation = standard_deviation**0.5
+ # calculate the average pass rate
+ average_pass_rate = sum(average_pass_rate_list) / len(average_pass_rate_list)
+ # calculate the average pass prompts
+ average_pass_prompts = sum(average_pass_prompts_list) / len(
+ average_pass_prompts_list
+ )
+ # calculate the average total prompts
+ average_total_prompts = sum(average_total_prompts) / len(average_total_prompts)
+
+ # add these numbers in the ckpt_values
+ index = f"{experiment}_summary"
+ ckpt_values[index] = {
+ "highest_test_score": highest_test_score,
+ "mean_test_score": mean_test_score,
+ "standard_deviation": standard_deviation,
+ "highest_test_score_json_file": highest_test_score_json_file,
+ "average_pass_rate": average_pass_rate,
+ "average_pass_prompts": average_pass_prompts,
+ "average_total_prompts": average_total_prompts,
+ }
print("\nAll Checkpoints:")
for experiment, ckpt in ckpt_values.items():
print(f"{experiment}: {ckpt}")
+
+ # Save the results to a file
+ with open("results.json", "w") as f:
+ json.dump(ckpt_values, f)