Skip to content

Commit

Permalink
add scripts to summarize multiple runs
Browse files Browse the repository at this point in the history
  • Loading branch information
liyin2015 committed Dec 18, 2024
1 parent 2aac4ff commit 66390d6
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 73 deletions.
82 changes: 23 additions & 59 deletions adalflow/adalflow/optim/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ class ComponentNode:

id: str = field(metadata={"desc": "The unique id of the component"})
name: str = field(metadata={"desc": "The name of the component"})
type: Literal["INPUT", "COMPONENT"] = field(
metadata={"desc": "The type of the node"}, default="COMPONENT"
)


@dataclass
Expand Down Expand Up @@ -1052,8 +1055,10 @@ def draw_component_subgraph(
for node in component_nodes:
node_label = f"""
<table border="0" cellborder="1" cellspacing="0">
<tr><td><b>ID:</b></td><td>{node.id}</td></tr>
<tr><td><b>Name:</b></td><td>{node.name}</td></tr>"""
<tr><td><b>Name:</b></td><td>{node.name}</td></tr>
<tr><td><b>TYPE:</b></td><td>{node.type}</td></tr>
"""
# <tr><td><b>ID:</b></td><td>{node.id}</td></tr>

# add the list of orders
if node.id in component_nodes_orders:
Expand All @@ -1068,7 +1073,7 @@ def draw_component_subgraph(

# Add edges with order labels
for source_id, target_id, edge_order in edges:
dot.edge(source_id, target_id, label=str(edge_order), color="black")
dot.edge(source_id, target_id) # , label=str(edge_order), color="black")

# Step 3: Save and render
dot.render(filepath, cleanup=True)
Expand Down Expand Up @@ -1113,58 +1118,6 @@ def traverse(node: "Parameter"):
traverse(self)
return output_nodes, edges

# def _collect_output_subgraph(
# self,
# ) -> Tuple[Set[Tuple[str, str]], List[Tuple[str, str]]]:
# """
# Collect OUTPUT nodes and their relationships using component_trace information.

# Returns:
# nodes (Set[Tuple[str, str]]): Set of component nodes (component_id, label).
# edges (List[Tuple[str, str]]): Edges between component IDs.
# """
# component_nodes = set() # To store component nodes as (component_id, label)
# edges = [] # To store edges between components

# visited = set() # Track visited parameters to avoid cycles

# def traverse(node: "Parameter"):
# if node in visited:
# return
# visited.add(node)

# # Only consider OUTPUT-type parameters
# if (
# node.param_type == ParameterType.OUTPUT
# or "OUTPUT" in node.param_type.name
# ):
# component_id = node.component_trace.id
# component_name = node.component_trace.name or "Unknown Component"
# label = f"{component_name}\nID: {component_id}"

# # Add the component as a node
# component_nodes.add((component_id, label))

# # Traverse predecessors and add edges
# for pred in node.predecessors:
# if pred.param_type == ParameterType.OUTPUT:
# pred_id = pred.component_trace.id
# pred_name = pred.component_trace.name or "Unknown Component"

# # Add predecessor as a node
# pred_label = f"{pred_name}\nID: {pred_id}"
# component_nodes.add((pred_id, pred_label))

# # Add edge between components
# edges.append((pred_id, component_id))

# # Recursive traversal
# traverse(pred)

# # Start traversal from the current parameter
# traverse(self)
# return component_nodes, edges

def _collect_component_subgraph(
self,
) -> Tuple[Set[ComponentNode], List[Tuple[str, str]]]:
Expand All @@ -1184,7 +1137,7 @@ def _collect_component_subgraph(
visited = set() # Track visited parameters to avoid cycles
edge_counter = [0] # Mutable counter for edge order tracking

def traverse(node: "Parameter", depth: int):
def traverse(node: "Parameter"):
if node in visited:
return
visited.add(node)
Expand All @@ -1211,14 +1164,25 @@ def traverse(node: "Parameter", depth: int):
pred.param_type == ParameterType.OUTPUT
or "OUTPUT" in pred.param_type.name
):
edges.append((pred_id, component_id, depth))
edges.append((pred_id, component_id, edge_counter[0]))
component_nodes.add(ComponentNode(id=pred_id, name=pred_name))
edge_counter[0] += 1

traverse(pred, depth + 1)
if pred.param_type == ParameterType.INPUT:
pred_id = pred.id
pred_name = pred.name
pred_node = ComponentNode(
id=pred_id, name=pred_name, type="INPUT"
)
component_nodes.add(pred_node)
# add an edge from input to the first output
edges.append((pred_id, component_id, edge_counter[0]))
edge_counter[0] += 1

traverse(pred)

# Start traversal from the current parameter
traverse(self, depth=0)
traverse(self)
# Reverse the edge order
# total_edges = len(edges)
# edges = [
Expand Down
21 changes: 18 additions & 3 deletions adalflow/adalflow/optim/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ class Trainer(Component):
batch_val_score_threshold: Optional[float] = (
1.0 # when acc_score >= this threshold, skip this batch
)
correct_val_score_threshold: Optional[float] = (
0.5 # when acc_score >= this threshold, it is considered as correct sample
)
max_error_samples: Optional[int] = 2
max_correct_samples: Optional[int] = 2
debug: bool = False
Expand All @@ -106,6 +109,7 @@ def __init__(
num_workers: int = 4,
ckpt_path: str = None,
batch_val_score_threshold: Optional[float] = 1.0,
correct_val_score_threshold: Optional[float] = 0.5,
max_error_samples: Optional[int] = 2,
max_correct_samples: Optional[int] = 2,
max_proposals_per_step: int = 5,
Expand Down Expand Up @@ -140,6 +144,7 @@ def __init__(
self.val_dataset = val_dataset
self.test_dataset = test_dataset
self.batch_val_score_threshold = batch_val_score_threshold
self.correct_val_score_threshold = correct_val_score_threshold
self.max_error_samples = max_error_samples
self.max_correct_samples = max_correct_samples
self.max_proposals_per_step = max_proposals_per_step
Expand Down Expand Up @@ -1680,10 +1685,18 @@ def _moving_batch_sample(
# ensure only 0 and 1 in the acc_score_list
import numpy as np

if not all([score in [0, 1] for score in acc_score_list]):
if not all(0 <= score <= 1 for score in acc_score_list):
raise ValueError("acc_score_list should only contain 0 and 1")
correct_indices = [i for i, score in enumerate(acc_score_list) if score == 1]
error_indices = [i for i, score in enumerate(acc_score_list) if score == 0]
correct_indices = [
i
for i, score in enumerate(acc_score_list)
if score > self.correct_val_score_threshold
]
error_indices = [
i
for i, score in enumerate(acc_score_list)
if score <= self.correct_val_score_threshold
]
print(f"Moving batch correct size: {len(correct_indices)}")
print(f"Moving batch error size: {len(error_indices)}")
if len(error_indices) == 0:
Expand Down Expand Up @@ -1984,6 +1997,8 @@ def _fit_text_grad_constraint(
**step_result,
)

# reset the moving batch

all_samples, all_losses, all_y_preds = [], [], []

else:
Expand Down
2 changes: 1 addition & 1 deletion use_cases/classification/train_string_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from use_cases.classification.data import load_datasets, TRECExtendedData

from adalflow.eval.answer_match_acc import AnswerMatchAcc
from LightRAG.use_cases.config import (
from use_cases.config import (
gpt_3_model,
gpt_4o_model,
)
Expand Down
28 changes: 22 additions & 6 deletions use_cases/question_answering/bbh/object_count/train_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,20 +140,36 @@ def train(


if __name__ == "__main__":
import sys
import json

# make the strategy configurable in the script
import argparse

parser = argparse.ArgumentParser()

parser.add_argument("--strategy", type=str, default="random")
parser.add_argument(
"output_path", nargs="?", help="File path to save the checkpoint"
)

args = parser.parse_args()

set_strategy = args.strategy
set_output_path = args.output_path

ckpt = train(
debug=False,
max_steps=12,
strategy="constrained",
max_steps=2,
strategy=set_strategy,
exclude_input_fields_from_bootstrap_demos=True,
)
print(f"ckpt: {ckpt}")
# Save ckpt to a file passed as an argument
if len(sys.argv) > 1: # Check if a file path is provided
with open(sys.argv[1], "w") as f:
if set_output_path:
with open(set_output_path, "w") as f:
json.dump({"ckpt": ckpt}, f)
print(f"Checkpoint saved to {set_output_path}")
else:
print("No file path provided for saving the checkpoint.")

# train_diagnose(**gpt_3_model)
# train_diagnose_teacher(**gpt_4o_model) # 4omini works well as an optimizer too
Expand Down
88 changes: 84 additions & 4 deletions use_cases/text_grad_2.0_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tempfile
import json

num_runs = 2
# List of experiments to run
object_count = "use_cases/question_answering/bbh/object_count/train_new.py"
hotpot_qa_multi_hop_rag = "benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py"
Expand All @@ -14,7 +15,7 @@

# Optional: Arguments for each experiment (if needed)
experiment_args = {
object_count: "",
object_count: "--strategy random",
# hotpot_qa_multi_hop_rag: "",
}
ckpt_values = {}
Expand Down Expand Up @@ -49,10 +50,89 @@ def run_experiment(script, args):
if __name__ == "__main__":
for experiment in experiments:
args = experiment_args.get(experiment, "")
ckpt = run_experiment(experiment, args)
if ckpt:
ckpt_values[experiment] = ckpt
for i in range(num_runs):
print(f"\nRun {i + 1}/{num_runs}")
ckpt = run_experiment(experiment, args)
ckpt_index = f"{experiment}_{i + 1}"
if ckpt:
ckpt_values[ckpt_index] = ckpt
# load all json files using the ckpt paths
highest_test_score, mean_test_score, standard_deviation = 0, 0, 0
past_highest_scores = []
# average pass rate, average pass prompts
average_pass_rate_list = []
average_pass_prompts_list = []
average_total_prompts = []
total_prompts = 0
highest_test_score_json_file = None
for experiment_index, ckpt in ckpt_values.items():
with open(ckpt, "r") as f:
data = json.load(f)
print(f"Experiment: {experiment_index}")
print(f"Data: {data}")
_high_test_score = max(data["test_scores"])
print(f" test score: {data["test_scores"]}")
past_highest_scores.append(_high_test_score)
if _high_test_score > highest_test_score:
highest_test_score = _high_test_score
highest_test_score_json_file = ckpt
# read the effective measures
effective_measures = data.get("effective_measures", {})
if not effective_measures:
total_prompts = len(data["test_scores"]) - 1
# count the total number of different test scores
pass_num = len(set(data["test_scores"])) - 1
average_pass_rate = pass_num / total_prompts
average_pass_rate_list.append(average_pass_rate)
average_pass_prompts_list.append(pass_num)
average_total_prompts.append(total_prompts)
else:
total_prompts = (
effective_measures["subset"]["pass"]
+ effective_measures["subset"]["fail"]
)

pass_num = effective_measures["valset"]["pass"]
total_val_prompts = (
effective_measures["valset"]["pass"]
+ effective_measures["valset"]["fail"]
)
average_pass_rate = pass_num / total_val_prompts
average_pass_rate_list.append(average_pass_rate)
average_pass_prompts_list.append(pass_num)
average_total_prompts.append(total_prompts)
# calculate the mean test score
mean_test_score = sum(past_highest_scores) / len(past_highest_scores)
# calculate the standard deviation
standard_deviation = sum(
[(x - mean_test_score) ** 2 for x in past_highest_scores]
) / len(past_highest_scores)
standard_deviation = standard_deviation**0.5
# calculate the average pass rate
average_pass_rate = sum(average_pass_rate_list) / len(average_pass_rate_list)
# calculate the average pass prompts
average_pass_prompts = sum(average_pass_prompts_list) / len(
average_pass_prompts_list
)
# calculate the average total prompts
average_total_prompts = sum(average_total_prompts) / len(average_total_prompts)

# add these numbers in the ckpt_values
index = f"{experiment}_summary"
ckpt_values[index] = {
"highest_test_score": highest_test_score,
"mean_test_score": mean_test_score,
"standard_deviation": standard_deviation,
"highest_test_score_json_file": highest_test_score_json_file,
"average_pass_rate": average_pass_rate,
"average_pass_prompts": average_pass_prompts,
"average_total_prompts": average_total_prompts,
}

print("\nAll Checkpoints:")
for experiment, ckpt in ckpt_values.items():
print(f"{experiment}: {ckpt}")

# Save the results to a file
with open("results.json", "w") as f:
json.dump(ckpt_values, f)

0 comments on commit 66390d6

Please sign in to comment.