srivatsankrishnan · jared-ni · Oct 6, 2023 · Oct 6, 2023
diff --git a/arch_gym/envs/AstraSimEnv.py b/arch_gym/envs/AstraSimEnv.py
@@ -8,6 +8,8 @@
 import csv
 import random
 
+from envHelpers import helpers
+
 settings_file_path = os.path.realpath(__file__)
 settings_dir_path = os.path.dirname(settings_file_path)
 proj_root_path = os.path.join(settings_dir_path, '..', '..')
@@ -16,18 +18,25 @@
 
 # astra-sim environment
 class AstraSimEnv(gym.Env):
-    def __init__(self, rl_form="random_walker", max_steps=5, num_agents=1, reward_formulation="None", reward_scaling=1):
-        # action space = set of all possible actions. Space.sample() returns a random action
-        self.action_space = gym.spaces.Discrete(16)
-        # observation space =  set of all possible observations
-        self.observation_space = gym.spaces.Discrete(1)
+    def __init__(self, rl_form="sa1", max_steps=5, num_agents=1, reward_formulation="None", reward_scaling=1,):
+        self.rl_form = rl_form
+
+        if self.rl_form == 'sa1':
+            # action space = set of all possible actions. Space.sample() returns a random action
+            # observation space =  set of all possible observations
+            self.observation_space = gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32) # box is an array of shape len
+            self.action_space = gym.spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
+            self.helpers = helpers()
 
+        else:
+            self.observation_space = gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
+            self.action_space = gym.spaces.Discrete(16)
+
+
         # set parameters
         self.max_steps = max_steps
         self.counter = 0
         self.useful_counter = 0
-
-        self.rl_form = rl_form
         self.num_agents = num_agents
         self.reward_formulation = reward_formulation
         self.reward_scaling = reward_scaling
@@ -49,9 +58,15 @@ def __init__(self, rl_form="random_walker", max_steps=5, num_agents=1, reward_fo
         self.networks_folder = os.path.join(sim_path, "astrasim-archgym/dse/archgen_v1_knobs/templates/network")
         self.workloads_folder = os.path.join(sim_path, "astrasim-archgym/themis/inputs/workload")
 
+        # Config does not matter
         self.network_config = os.path.join(self.networks_folder, "3d_fc_ring_switch.json")
-        self.workload_config = os.path.join(sim_path, "realworld_workloads/transformer_1t_fused_only_t.txt")
+        self.workload_config = os.path.join(self.workloads_folder, "all_reduce/allreduce_0.65.txt")
+        self.astrasim_archgym = os.path.join(sim_path, "astrasim-archgym")
+        self.systems_folder = os.path.join(self.astrasim_archgym, "themis/inputs/system")
 
+        self.network_file = "4d_ring_fc_ring_switch.json"
+        self.system_file = os.path.join(self.systems_folder, "4d_ring_fc_ring_switch_baseline.txt")
+        self.workload_file = "all_reduce/allreduce_0.65.txt"
 
         print("_____________________*****************************_____________________")
 
@@ -60,6 +75,7 @@ def __init__(self, rl_form="random_walker", max_steps=5, num_agents=1, reward_fo
     # reset function
 
     def reset(self):
+
         self.counter = 0
         # get results folder path
         results_folder_path = os.path.join(sim_path, "results", "run_general")
@@ -72,7 +88,13 @@ def reset(self):
             csv_files = os.path.join(results_folder_path, csv_files)
             if os.path.exists(csv_files):
                 os.remove(csv_files)
-        return
+
+        # TODO: 
+        obs = np.zeros(self.observation_space.shape)
+
+        return obs
+
+
 
     # parses a result csv file and stores it in a dictionary
     def parse_result(self, file_name):
@@ -113,13 +135,45 @@ def calculate_reward(self, observations):
             print(sum)
         return 1 / (sum ** 0.5)
 
+
+    # parse system_file (above is the content) into dict
+    def parse_system(self, system_file, action_dict):
+        action_dict['system'] = {}
+        with open(system_file, 'r') as file:
+            lines = file.readlines()
+
+            for line in lines:
+                key, value = line.strip().split(': ')
+                action_dict['system'][key] = value
+
     # give it one action: one set of parameters from json file
     def step(self, action_dict):
 
-        # write the three config files
-        # with open(self.network_config, "w") as outfile:
-        #     outfile.write(json.dumps(action_dict['network'], indent=4))
-        print(action_dict)
+        if not isinstance(action_dict, dict):
+            with open(settings_dir_path + "/AstraSimRL_2.csv", 'a') as f:
+                writer = csv.writer(f)
+                writer.writerow(action_dict)
+
+            print("STEP: action_dict is a list")
+            action_dict_decoded = {}
+            action_dict_decoded['network'] = {"path": self.network_file}
+            action_dict_decoded['workload'] = {"path": self.workload_file}
+
+            # parse system: initial values
+            self.parse_system(self.system_file, action_dict_decoded)
+
+            # returning an 
+            action_decoded = self.helpers.action_decoder_ga_astraSim(action_dict)
+
+            # change all variables decoded into action_dict
+            for sect in action_decoded:
+                for key in action_decoded[sect]:
+                    action_dict_decoded[sect][key] = action_decoded[sect][key]
+
+            action_dict = action_dict_decoded
+
+
+
         if "path" in action_dict["network"]:
             self.network_config = action_dict["network"]["path"]
 
@@ -135,12 +189,7 @@ def step(self, action_dict):
 
         # the action is actually the parsed parameter files
         print("Step: " + str(self.counter))
-        if (self.counter == self.max_steps):
-            self.done = True
-            print("Maximum steps reached")
-            self.reset()
-        else:
-            self.counter += 1
+        self.counter += 1
 
         # start subrpocess to run the simulation
         # $1: network, $2: system, $3: workload
@@ -174,57 +223,46 @@ def step(self, action_dict):
         sample_all_reduce_dimension_utilization = self.parse_result(sim_path +
             '/results/run_general/sample_all_reduce_dimension_utilization.csv')
 
+        if (self.counter == self.max_steps):
+            self.done = True
+            print("Maximum steps reached")
+            self.reset()
+
+
         # test if the csv files exist (if they don't, the config files are invalid)
         if ((len(backend_dim_info) == 0 or len(backend_end_to_end) == 0 or
              len(detailed) == 0 or len(end_to_end) == 0 or
              len(sample_all_reduce_dimension_utilization) == 0)):
             # set reward to be extremely negative
             reward = float("-inf")
             print("reward: ", reward)
-            return [[], reward, self.done, {"useful_counter": self.useful_counter}, self.state]
+            return [], reward, self.done, {"useful_counter": self.useful_counter}, self.state
         else:
             # only recording the first line because apparently they are all the same? TODO
-            self.observations = [
-                backend_end_to_end["CommsTime"][0],
+            observations = [
+                float(backend_end_to_end["CommsTime"][0])
                 # end_to_end["fwd compute"][0],
                 # end_to_end["wg compute"][0],
                 # end_to_end["ig compute"][0],
                 # end_to_end["total exposed comm"][0]
             ]
-            reward = self.calculate_reward(self.observations)
-            print("reward: ", reward)
-            print("observations: ", self.observations)
 
+
+            reward = self.calculate_reward(observations)
+
+            print("reward: ", reward)
+
+            # reshape observations with shape of observation space
+            observations = np.reshape(observations, self.observation_space.shape)
             self.useful_counter += 1
 
-            return [self.observations, reward, self.done, {"useful_counter": self.useful_counter}, self.state]
+            return observations, reward, self.done, {"useful_counter": self.useful_counter}, self.state
 
 
 if __name__ == "__main__":
     print("Testing AstraSimEnv")
-    env = AstraSimEnv(rl_form='random_walker', 
+    env = AstraSimEnv(rl_form='sa1', 
                       max_steps=10, 
                       num_agents=1, 
                       reward_formulation='reward_formulation_1', 
                       reward_scaling=1)
-
-
-
-
-
-
-    """
-    Everytime rest happens: 
-    - zero out the observation
-
-    3/24: 
-    Communication Time (unit: microseconds)
-    Time breakdowns (forward pass, weight gradient, input gradient)
-    Exposed communication
-
-
-    3/31: 
-    Catch errors by giving it high negative reward. This way we can test the range. 
-
-
-    """
diff --git a/arch_gym/envs/AstraSimWrapper.py b/arch_gym/envs/AstraSimWrapper.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Wraps an OpenAI Gym environment to be used as a dm_env environment."""
-import sys, os
+import sys
 from typing import Any, Dict, List, Optional
 
 from acme import specs
@@ -25,8 +25,8 @@
 import numpy as np
 import tree
 
-os.sys.path.insert(0, os.path.abspath('../../'))
-from arch_gym.envs.AstraSimEnv import AstraSimEnv
+from AstraSimEnv import AstraSimEnv
+from envHelpers import helpers
 
 # dm = deepmind 
 class AstraSimEnvWrapper(dm_env.Environment):
@@ -41,6 +41,7 @@ def __init__(self, environment: gym.Env,
     self._environment = environment
     self._reset_next_step = True
     self._last_info = None
+    self.helper = helpers()
     self.env_wrapper_sel = env_wrapper_sel
 
     # set useful counter
@@ -182,12 +183,12 @@ def _convert_to_spec(space: gym.Space,
   else:
     raise ValueError('Unexpected gym space: {}'.format(space))
 
-def make_astraSim_env(seed: int = 12345,
-                    rl_form = 'macme',
+def make_astraSim_env(seed: int = 12234,
+                    rl_form = 'sa1',
                     reward_formulation = 'power',
                     reward_scaling = 'false',
-                    max_steps: int = 100,
-                    num_agents: int = 10) -> dm_env.Environment:
+                    max_steps: int = 1,
+                    num_agents: int = 1) -> dm_env.Environment:
   """Returns DRAMSys environment."""
   print("[DEBUG][Seed]", seed)
   print("[DEBUG][RL Form]", rl_form)
@@ -205,7 +206,8 @@ def make_astraSim_env(seed: int = 12345,
     ),
     env_wrapper_sel = rl_form
   )
+
   environment = wrappers.SinglePrecisionWrapper(environment)
-  if(rl_form == 'sa' or rl_form == 'tdm'):
-    environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
+  if(rl_form == 'sa1' or rl_form == 'tdm'):
+    environment = wrappers.CanonicalSpecWrapper(environment, clip=False)
   return environment
diff --git a/arch_gym/envs/envHelpers.py b/arch_gym/envs/envHelpers.py
@@ -797,10 +797,10 @@ def action_decoder_ga_astraSim(self, act_encoded):
         interDimension_mapper = {0: "baseline", 1: "themis"}
 
         # Modified system parameters
-        act_decoded["system"]["scheduling-policy"] = schedulePolicy_mapper[int(act_encoded[0])]
-        act_decoded["system"]["collective-optimization"] = collectiveOptimization_mapper[int(act_encoded[1])]
-        act_decoded["system"]["intra-dimension-scheduling"] = intraDimension_mapper[int(act_encoded[2])]
-        act_decoded["system"]["inter-dimension-scheduling"] = interDimension_mapper[int(act_encoded[3])]
+        act_decoded["system"]["scheduling-policy"] = schedulePolicy_mapper[int(round(act_encoded[0]))]
+        act_decoded["system"]["collective-optimization"] = collectiveOptimization_mapper[int(round(act_encoded[1]))]
+        act_decoded["system"]["intra-dimension-scheduling"] = intraDimension_mapper[int(round(act_encoded[2]))]
+        act_decoded["system"]["inter-dimension-scheduling"] = interDimension_mapper[int(round(act_encoded[3]))]
 
         return act_decoded
 

diff --git a/sims/AstraSim/AstraSimRL.csv b/sims/AstraSim/AstraSimRL.csv
@@ -0,0 +1,5 @@
+0.17908713,0.44089648,0.83359694,0.2673431
+0.5,0.5,0.5,0.5
+0.0,0.0,0.0,1.0
+0.0,0.0,0.0,1.0
+0.17908713,0.44089648,0.83359694,0.2673431
diff --git a/sims/AstraSim/astrasim-archgym b/sims/AstraSim/astrasim-archgym
diff --git a/sims/AstraSim/bo_logs/metadata.riegeli b/sims/AstraSim/bo_logs/metadata.riegeli
diff --git a/sims/AstraSim/exp_config.ini b/sims/AstraSim/exp_config.ini
@@ -0,0 +1,7 @@
+[experiment_configuration]
+exp_name = resnet18_random_state_2_num_iter_16
+trajectory_dir = ./bo_trajectories/power/resnet18_random_state_2_num_iter_16
+log_dir = ./bo_logs/power/resnet18_random_state_2_num_iter_16
+reward_formulation = power
+use_envlogger = True
+
diff --git a/sims/AstraSim/general_workload.txt b/sims/AstraSim/general_workload.txt
@@ -0,0 +1,20 @@
+HYBRID_TRANSFORMER_FWD_IN_BCKWD	model_parallel_NPU_group: 128 checkpoints: 2 0 9 checkpoint_initiates: 2 17 8
+18
+Q1				-1	2343750		NONE		0			2343750		ALLREDUCE	805306368	2343750		ALLREDUCE	240316416   10
+K1				-1	2343750		NONE		0			2343750		NONE		0			2343750		NONE	    0	        10
+V1				-1	2343750		NONE		0			2343750		NONE		0			2343750		NONE	    0	        10
+QK1				-1	97656		NONE		0			97656		NONE		0			97656		NONE		0			10
+softmax1		-1	97656		NONE		0			97656		NONE		0			97656		NONE		0			10
+concat1			-1	2343750		ALLREDUCE	805306368	2343750		ALLGATHER	6291456	    2343750		NONE	    0	        10
+X1W1b1			-1	9375000	    NONE		0			9375000	    ALLREDUCE	805306368	9375000	    NONE	    0	        10
+X1W2b2			-1	9375000	    ALLREDUCE	805306368	9375000	    NONE		0			9375000	    NONE	    0	        10
+layerNorm1		-1	12207	    NONE		0			12207	    NONE		0			12207	    NONE		0			10
+Q2				-1	2343750		NONE		0			2343750		ALLREDUCE	805306368	2343750		NONE	    0           10
+K2				-1	2343750		NONE		0			2343750		NONE		0			2343750		NONE	    0	        10
+V2				-1	2343750		NONE		0			2343750		NONE		0			2343750		NONE	    0	        10
+QK2				-1	97656		NONE		0			97656		NONE		0			97656		NONE		0			10
+softmax2		-1	97656		NONE		0			97656		NONE		0			97656		NONE		0			10
+concat2			-1	2343750		ALLREDUCE	805306368	2343750		ALLGATHER	6291456	    2343750		NONE	    0	        10
+X2W1b1			-1	9375000	    NONE		0			9375000	    ALLREDUCE	805306368	9375000	    NONE	    0	        10
+X2W2b2			-1	9375000	    ALLREDUCE	805306368	9375000	    NONE		0			9375000	    NONE	    0	        10
+layerNorm2		-1	12207	    NONE		0			12207   	NONE		0			12207	    NONE		0			10