ShuxiangCao · ShuxiangCao · Dec 9, 2024 · Dec 8, 2024 · Dec 8, 2024 · Dec 9, 2024
diff --git a/benchmark/exp_recall/embedding_search_benchmarking.py b/benchmark/exp_recall/embedding_search_benchmarking.py
@@ -165,22 +165,24 @@ def check_code(codes, exp_class):
 
 
 from leeq.utils.ai.translation_agent import init_leeq_translation_agents
-from k_agents.translation.agent import TranslationAgentGroup, get_codegen_wm
+from k_agents.translation.agent import TranslationAgentGroup, get_codegen_wm, CodegenAgent
 from k_agents.variable_table import VariableTable
 
 class TransmonElementFake:
     def __repr__(self):
         return "TransmonElement"
-def benchmark_single(key, exp_class, description, code_cog_model):
+
+
+def benchmark_single(key, exp_class, description, code_gen_model):
     input_var_table = VariableTable()
     input_var_table.add_variable("dut", TransmonElementFake(), "device under test")
     print("Description:", description)
     codegen_wm = get_codegen_wm(description, input_var_table)
-    recall_res = code_cog_model.recall(codegen_wm)
+    recall_res = code_gen_model.recall(codegen_wm)
 
     additional_info = []
 
-    codes = code_cog_model.codegen(codegen_wm, recall_res)
+    codes = code_gen_model.codegen(codegen_wm, recall_res)
     try:
         success = check_code(codes, exp_class)
     except Exception as e:
@@ -198,12 +200,14 @@ def benchmark_all(rag, n_recall_items):
     env = TranslationAgentEnv()
     translation_agents = env.translation_agents
     if rag:
-        code_cog_model = TranslationAgentGroupRAG()
+        code_gen_model = TranslationAgentGroupRAG()
     else:
-        code_cog_model = TranslationAgentGroup()
-    code_cog_model.n_recall_items = n_recall_items
-    for idea in translation_agents.translation_agents:
-        code_cog_model.translation_agents.add_agent(idea)
+        code_gen_model = TranslationAgentGroup()
+        code_gen_model.codegen_agent = CodegenAgent()
+    code_gen_model.n_recall_items = n_recall_items
+
+    for agent in translation_agents.translation_agents.agents:
+        code_gen_model.translation_agents.add_agent(agent)
 
     results_list = {}
 
@@ -213,7 +217,7 @@ def benchmark_one_experiment(_exp_name):
         exp_prompts = experiment_prompt[_exp_name][1]
         def benchmark_one_prompt(_prompt):
             try:
-                success, additional_info = benchmark_single(_exp_name, exp_class, _prompt, code_cog_model)
+                success, additional_info = benchmark_single(_exp_name, exp_class, _prompt, code_gen_model)
             except Exception as e:
                 success = False
                 additional_info = str(e)
@@ -224,7 +228,6 @@ def benchmark_one_prompt(_prompt):
             print(success, additional_info)
             results.append((prompt, success, additional_info))
         return results
-    #t = ["RB1Q"]
     t = list(experiment_prompt.keys())
     for exp_name, res in p_map(benchmark_one_experiment, t, n_workers=4):
         results_list[exp_name] = res
@@ -267,6 +270,7 @@ def main(model, shots, rag, n_recall_items):
             }
             results[i] = result
         except Exception as e:
+            print(str(e))
             result = {
                 'status': 'error',
                 'error': str(e)
@@ -281,18 +285,19 @@ def entry(model, rag):
     from mllm.config import default_options
     default_parallel_map_config["n_workers"] = 3
     default_options.timeout = 120
+    default_options.temperature = 0.2
 
     # You have to enable this option before using the `correct_json_by_model` rule
     parse_options.correct_json_by_model = True
     n_recall_items = 2
-    shots = 3
+    shots = 4
     main(model, shots, rag, n_recall_items)
 
 
 if __name__ == '__main__':
     models = [
-        "gpt-4o-2024-08-06",
         "gpt-4o-mini",
+        "gpt-4o-2024-08-06",
         "replicate/meta/meta-llama-3-70b-instruct",
         "claude-3-opus-20240229",
         "gemini/gemini-1.5-pro-latest",

diff --git a/leeq/experiments/builtin/multi_qubit_gates/conditional_stark_ai.py b/leeq/experiments/builtin/multi_qubit_gates/conditional_stark_ai.py
@@ -2593,11 +2593,14 @@ def _run_next_experiment(self, run_class, params, filter_parameters=True):
 
 
 class ConditionalStarkTwoQubitGateAmplitudeAdvise(Experiment):
+
+    n_points_to_try = 2 #5
     _rewrite_json_requirement = True
 
     _experiment_result_analysis_instructions = """
     Output a JSON dict with the following keys:
-    "success" (bool): true 
+    "exp_continue" (bool): whether exp_continue is true
+    "success" (bool): whether exp_continue is true
     "best_amplitude" (float): The best amplitude found in a successful experiment.
     "advised_amplitude" (float): The next amplitude to try.
     """
@@ -2619,8 +2622,7 @@ def run(self, duts: List[TransmonElement], frequency: float):
     @text_inspection
     def next_parameter(self):
         prompt = f"""
-        Your objective is to find the optimal parameters for the conditional stark-shift gate that will allow you to entangle 
-        two qubits. The parameters you need to find are 
+        Your objective is to find the optimal parameters for the conditional stark-shift gate that will allow you to entangle two qubits. The parameters you need to find are 
         <parameters>
         'amp_control':  the amplitude of the control qubit (The first qubit), the required amplitude accuracy is 0.01. 
         </parameters>
@@ -2659,14 +2661,21 @@ def next_parameter(self):
                     "You are a very smart and helpful assistant who only reply in JSON dict. Keep everything in a same line in the response.", dedent=True)
         res = chat.complete(parse="dict", expensive=True, cache=True)
 
+        tuning_env = TwoQubitTuningEnv()
+        results = tuning_env.amplitude_tuning_results.get(self.frequency, [])
+        n_points_tried = len(results)
+        if n_points_tried >= self.n_points_to_try:
+            res["exp_continue"] = False
+        else:
+            res["exp_continue"] = True
         return res
 
     @text_inspection
     def best_amplitude(self):
         tuning_env = TwoQubitTuningEnv()
         if self.frequency not in tuning_env.amplitude_tuning_results:
             return {
-                "best_amp": 'There is no successful experiment yet.'
+                "best_amp": 'There is no successful experiment yet.',
             }
         results = tuning_env.amplitude_tuning_results[self.frequency]
         amps = []
@@ -2676,11 +2685,11 @@ def best_amplitude(self):
         # the largest amp
         if len(amps) == 0:
             return {
-                "best_amp": 'There is no successful experiment yet.'
+                "best_amp": 'There is no successful experiment yet.',
             }
         best_amp = max(amps)
         return {
-            "best_amp": best_amp
+            "best_amp": best_amp,
         }
 
     def _experiment_history_to_prompt(self):
@@ -2766,11 +2775,14 @@ def run(
 
 
 class ConditionalStarkTwoQubitGateFrequencyAdvise(Experiment):
+    n_points_to_try = 2 #15
+
     _rewrite_json_requirement = True
 
     _experiment_result_analysis_instructions = """
     Output a JSON dict with the following keys:
-    "success" (bool): true
+    "success" (bool): if exp_continue is true
+    "exp_continue" (bool): if exp_continue is true
     "best_frequency" (float): The best frequency found in a successful experiment.
     "advised_frequency" (float): The next frequency to try.
     """
@@ -2827,7 +2839,7 @@ def next_frequency(self):
         "analysis" (str): An analysis of the current situation.
         "finished" (bool): whether the experiment is finished.
         "current_best" (float): The highest control frequency from a succeeded experiment. The value can be None if no experiment is successful.
-        "new_frequency_to_try" (float): The new frequency of the control qubit to try. If the experiment is finished, set this to the optimal amplitude.
+        "new_frequency_to_try" (float): The new frequency of the control qubit to try. If the experiment is finished, set this to the optimal amplitude,
         </format>
         <requirement>
         """
@@ -2836,6 +2848,12 @@ def next_frequency(self):
                     "You are a very smart and helpful assistant who only reply in JSON dict. Keep everything in a same line in the response.", dedent=True)
         res = chat.complete(parse="dict", expensive=True, cache=True)
 
+        tuning_env = TwoQubitTuningEnv()
+        n_points_tried = len(tuning_env.frequency_to_good_amplitude.items())
+        if n_points_tried >= self.n_points_to_try:
+            res["exp_continue"] = False
+        else:
+            res["exp_continue"] = True
         return res
 
     @text_inspection
@@ -2852,7 +2870,7 @@ def best_frequency(self):
         if best_freq is not None:
             return {
                 "best_freq": best_freq,
-                "best_amp": best_amp
+                "best_amp": best_amp,
             }
         else:
             return {

diff --git a/leeq/experiments/procedures/two_qubit_calibration.md b/leeq/experiments/procedures/two_qubit_calibration.md
@@ -2,16 +2,16 @@
 ## Background
 This procedure should not be used when only calibrate a specific aspect of the qubit, such as frequency or amplitude.
 ## Steps
-- Run ConditionalStarkTwoQubitGateFrequencyAdvise with `duts`. Proceed to the next stage whatever the result is.
-- Do an Iterative two-qubit amplitude test at the advised frequency on `duts`. If failed, go back the Stage 1. If failed 5 times, proceed to the Fail stage. If succeeded, proceed to Complete.
+- Run ConditionalStarkTwoQubitGateFrequencyAdvise with `duts`. Proceed to the next stage if exp_continue. If not exp_continue, go to Complete stage if there is a best frequency and go to Failed state if there is not.
+- Do an Iterative two-qubit amplitude test at the advised frequency on `duts`. Go back the Stage 1 whether the result is.
 
 
 
 # Iterative Two-qubit Amplitude test at `frequency` on `duts`
 ## Background
 This experiment searches for the optimal amplitude for the two-qubit gate at `frequency` on `duts`. This is not a single step experiment, but an iterative one.
 ## Steps
-- Run ConditionalStarkTwoQubitGateAmplitudeAdvise with `frequency` and `duts`. Proceed to the next stage whatever the result is.
-- Run ConditionalStarkTwoQubitGateAmplitudeAttempt with duts=`duts`, frequency=`frequency`, amplitude = new_amplitude_to_try. If failed, go back the Stage 1. If failed 5 times, proceed to the Fail stage. If succeeded, proceed to Complete.
+- Run ConditionalStarkTwoQubitGateAmplitudeAdvise with `frequency` and `duts`. Proceed to the next stage if exp_continue. If not exp_continue, go to Complete stage if there is a best frequency and go to Failed state if there is not.
+- Run ConditionalStarkTwoQubitGateAmplitudeAttempt with duts=`duts`, frequency=`frequency`, amplitude = new_amplitude_to_try. Go back the Stage 1 whether the result is.
 ## Results
 Whether there is a success experiment or not. If so, what is the amplitude.
diff --git a/notebooks/Agent/SingleQubitTuneUp.ipynb b/notebooks/Agent/SingleQubitTuneUp.ipynb
@@ -0,0 +1,74 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "46ed3b2a-adb8-4455-b96e-7f68696f672e",
+   "metadata": {
+    "scrolled": true
+   },
+   "source": [
+    "# Automated calibration for single qubit\n",
+    "## Load virtual qubit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "6bc05f9e-e8b3-4aa8-8382-9d6c202230a8",
+   "metadata": {},
+   "source": [
+    "from simulated_setup import *\n",
+    "from leeq.experiments.builtin import *\n",
+    "\n",
+    "qubit = simulation_setup()"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45f9aec7a2c7ecfc",
+   "metadata": {},
+   "source": "## Run calibration by agents"
+  },
+  {
+   "cell_type": "code",
+   "id": "b4d4f2c9-ad83-461c-ba24-a33aae217cde",
+   "metadata": {},
+   "source": [
+    "from k_agents.execution.agent import execute_procedure\n",
+    "from leeq.utils.ai.translation_agent import init_leeq_translation_agents\n",
+    "\n",
+    "from mllm.config import default_models\n",
+    "default_models.normal = \"gpt-4o\"\n",
+    "default_models.expensive = \"gpt-4o\"\n",
+    "setup().status().set_param(\"AIAutoInspectPlots\", True)     \n",
+    "\n",
+    "init_leeq_translation_agents()\n",
+    "execute_procedure(\"Full gate calibration on `dut`\", dut=qubit)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/Agent/TwoQubitTuneUp.ipynb b/notebooks/Agent/TwoQubitTuneUp.ipynb
@@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "# Automated calibration for two-qubit gate\n",
+    "## Load virtual qubits"
+   ],
+   "id": "66bd3e2100e94e47"
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {},
+   "source": [
+    "from simulated_setup_2 import *\n",
+    "from leeq import ExperimentManager\n",
+    "from labchronicle import Chronicle\n",
+    "Chronicle().start_log()\n",
+    "qubit_1, qubit_2 = get_virtual_qubit_pair()\n",
+    "ExperimentManager().status().set_param(\"Plot_Result_In_Jupyter\", True)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Run calibration by agents",
+   "id": "7440cf399d01c3e4"
+  },
+  {
+   "cell_type": "code",
+   "id": "fed6c78a29e55398",
+   "metadata": {},
+   "source": [
+    "from k_agents.execution.agent import execute_procedure\n",
+    "from leeq.utils.ai.translation_agent import init_leeq_translation_agents\n",
+    "from mllm import config\n",
+    "\n",
+    "config.default_models.normal = \"gpt-4o\"\n",
+    "config.default_models.expensive = \"gpt-4o\"\n",
+    "config.default_options.temperature = 0.2\n",
+    "\n",
+    "init_leeq_translation_agents()\n",
+    "\n",
+    "execute_procedure(\"Two level Two-qubit calibration on `duts`\", duts=[qubit_1, qubit_2])"
+   ],
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}