Merge branch 'ewilk0-pr'

chrisyeh96 · May 13, 2024 · 820a5b9 · 820a5b9
2 parents 23ab5e5 + 7346660
commit 820a5b9
Show file tree

Hide file tree

Showing 6 changed files with 559 additions and 153 deletions.
diff --git a/sustaingym/envs/building/__init__.py b/sustaingym/envs/building/__init__.py
@@ -2,8 +2,4 @@
 from .multiagent_env import MultiAgentBuildingEnv
 from .utils import ParameterGenerator
 
-__all__ = [
-    'BuildingEnv',
-    'MultiAgentBuildingEnv',
-    'ParameterGenerator'
-]
+__all__ = ["BuildingEnv", "MultiAgentBuildingEnv", "ParameterGenerator"]
diff --git a/sustaingym/envs/building/env.py b/sustaingym/envs/building/env.py
@@ -1,6 +1,7 @@
 """
 The module implements the BuildingEnv class.
 """
+
 from __future__ import annotations
 
 from typing import Any
@@ -80,6 +81,7 @@ class BuildingEnv(gym.Env):
         timestep: current timestep in episode, from 0 to 288
         action_space: structure of actions expected by environment
     """
+
     # Occupancy nonlinear coefficients, collected from page 1299 of
     # https://energyplus.net/assets/nrel_custom/pdfs/pdfs_v23.1.0/EngineeringReference.pdf
     OCCU_COEF = [
@@ -120,20 +122,21 @@ def __init__(self, parameters: dict[str, Any]):
         """
         self.parameters = parameters
 
-        self.n = parameters['n']
-        self.zones = parameters['zones']
-        self.target = parameters['target']
-        self.out_temp = parameters['out_temp']
-        self.ground_temp = parameters['ground_temp']
-        self.ghi = parameters['ghi']
-        self.metabolism = parameters['metabolism']
-        self.ac_map = parameters['ac_map']
-        self.maxpower = parameters['max_power']
-        self.temp_range = parameters['temp_range']
-        self.reward_pnorm = parameters['reward_pnorm']
-        self.is_continuous_action = parameters['is_continuous_action']
-        self.timestep = parameters['time_resolution']
-        self.Occupower = 0
+        self.n = parameters["n"]
+        self.zones = parameters["zones"]
+        self.target = parameters["target"]
+        self.out_temp = parameters["out_temp"]
+        self.ground_temp = parameters["ground_temp"]
+        self.ghi = parameters["ghi"]
+        self.metabolism = parameters["metabolism"]
+        self.ac_map = parameters["ac_map"]
+        self.maxpower = parameters["max_power"]
+        self.temp_range = parameters["temp_range"]
+        self.reward_pnorm = parameters["reward_pnorm"]
+        self.is_continuous_action = parameters["is_continuous_action"]
+        self.timestep = parameters["time_resolution"]
+        self.episode_len = parameters["episode_len"]
+        self.Occupower = 0.
         self.datadriven = False
         self.length_of_weather = len(self.out_temp)
 
@@ -146,25 +149,30 @@ def __init__(self, parameters: dict[str, Any]):
             self.action_space = gym.spaces.Box(self.Qlow, self.Qhigh, dtype=np.float32)
         else:
             self.action_space = gym.spaces.MultiDiscrete(
-                (self.Qhigh * self.DISCRETE_LENGTH
-                 - self.Qlow * self.DISCRETE_LENGTH).astype(np.int64)
+                (
+                    self.Qhigh * self.DISCRETE_LENGTH - self.Qlow * self.DISCRETE_LENGTH
+                ).astype(np.int64)
             )
 
         # Set the observation space bounds based on the minimum and maximum temperature
         min_T, max_T = self.temp_range
         heat_max = 1000
-        self.low = np.concatenate([
-            np.ones(self.n + 1) * min_T,  # temp of zones and outdoor
-            [0],                          # GHI
-            [min_T],                      # temp of ground
-            [-min_T * self.OCCU_COEF_LINEAR / 1000]  # occupancy power
-        ]).astype(np.float32)
-        self.high = np.concatenate([
-            np.ones(self.n + 1) * max_T,  # temp of zones and outdoor
-            [heat_max],                   # GHI
-            [max_T],                      # temp of ground
-            [heat_max]                    # occupancy power
-        ]).astype(np.float32)
+        self.low = np.concatenate(
+            [
+                np.ones(self.n + 1) * min_T,  # temp of zones and outdoor
+                [0],  # GHI
+                [min_T],  # temp of ground
+                [-min_T * self.OCCU_COEF_LINEAR / 1000],  # occupancy power
+            ]
+        ).astype(np.float32)
+        self.high = np.concatenate(
+            [
+                np.ones(self.n + 1) * max_T,  # temp of zones and outdoor
+                [heat_max],  # GHI
+                [max_T],  # temp of ground
+                [heat_max],  # occupancy power
+            ]
+        ).astype(np.float32)
         self.observation_space = gym.spaces.Box(self.low, self.high, dtype=np.float32)
 
         # Set the weight for the power consumption and comfort range
@@ -184,17 +192,18 @@ def __init__(self, parameters: dict[str, Any]):
         self.X_new = self.target
 
         # Stack B and D matrix together for easy calculation
-        A = parameters['A']
-        B = parameters['B']
-        D = parameters['D']
+        A = parameters["A"]
+        B = parameters["B"]
+        D = parameters["D"]
         BD = np.hstack((D[:, np.newaxis], B))
 
         # Compute the discrete-time system matrices
         self.A_d = expm(A * self.timestep)
         self.BD_d = LA.inv(A) @ (self.A_d - np.eye(self.A_d.shape[0])) @ BD
 
-    def step(self, action: np.ndarray
-             ) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
+    def step(
+        self, action: np.ndarray
+    ) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
         """Steps the environment.
 
         Updates the state of the environment based on the given action and calculates the
@@ -231,12 +240,12 @@ def step(self, action: np.ndarray
         done = False
 
         # Prepare the input matrices X and Y
-        X = self.state[:self.n].T
+        X = self.state[: self.n].T
         Y = np.insert(
             np.append(action, self.ghi[self.epoch]), 0, self.out_temp[self.epoch]
         ).T
         Y = np.insert(Y, 0, self.ground_temp[self.epoch]).T
-        avg_temp = np.sum(self.state[:self.n]) / self.n
+        avg_temp = np.sum(self.state[: self.n]) / self.n
         meta = self.metabolism[self.epoch]
 
         # If the environment is data-driven, add additional features to the Y matrix
@@ -273,13 +282,17 @@ def step(self, action: np.ndarray
         info = self._get_info()
 
         # self.statelist.append(self.state)
-        self.state = np.concatenate([
-            X_new,
-            [self.out_temp[self.epoch],
-             self.ground_temp[self.epoch],
-             self.ghi[self.epoch],
-             self.Occupower / 1000],
-        ]).astype(np.float32)
+        self.state = np.concatenate(
+            [
+                X_new,
+                [
+                    self.out_temp[self.epoch],
+                    self.ground_temp[self.epoch],
+                    self.ghi[self.epoch],
+                    self.Occupower / 1000,
+                ],
+            ]
+        ).astype(np.float32)
 
         # Store the action in the actionlist
         self.actionlist.append(action * self.maxpower)
@@ -288,24 +301,32 @@ def step(self, action: np.ndarray
         self.epoch += 1
 
         # Check if the environment has reached the end of the weather data
+        if (
+            self.epoch % (self.length_of_weather // self.episode_len) == 0
+            and self.epoch != 0
+        ):
+            done = True
         if self.epoch >= self.length_of_weather - 1:
             done = True
             self.epoch = 0
 
         # Return the new state, reward, done flag, and info
         return self.state, reward, done, done, info
 
-    def reset(self, *, seed: int | None = None, options: dict | None = None
-              ) -> tuple[np.ndarray, dict[str, Any]]:
+    def reset(
+        self, *, seed: int | None = None, options: dict | None = None
+    ) -> tuple[np.ndarray, dict[str, Any]]:
         """Resets the environment.
 
         Prepares the environment for the next episode by setting the initial
         temperatures, average temperature, occupancy, and occupower. The initial state
         is constructed by concatenating these variables.
 
         Args:
-            seed: seed for resetting the environment. An episode is entirely
-                reproducible no matter the generator used.
+            seed: seed for resetting the environment. The seed determines which episode
+                to start at. Increment the seed sequentially to experience episodes
+                in chronological order. Set seed to None for a random episode.
+                An episode is entirely reproducible no matter the generator used.
             options: optional resetting options
 
                 - 'T_initial': np.ndarray, shape [n], initial temperature of each zone
@@ -317,7 +338,12 @@ def reset(self, *, seed: int | None = None, options: dict | None = None
         super().reset(seed=seed, options=options)
 
         # Initialize the episode counter
-        self.epoch = 0
+        num_episodes = self.length_of_weather // self.episode_len
+        if seed is None:
+            episode = self.np_random.integers(low=0, high=num_episodes)
+        else:
+            episode = seed % num_episodes
+        self.epoch = episode * self.episode_len
 
         # Initialize state and action lists
         self.statelist = []
@@ -339,13 +365,17 @@ def reset(self, *, seed: int | None = None, options: dict | None = None
 
         # Construct the initial state by concatenating relevant variables
         self.X_new = T_initial
-        self.state = np.concatenate([
-            T_initial,
-            [self.out_temp[self.epoch],
-             self.ground_temp[self.epoch],
-             self.ghi[self.epoch],
-             self.Occupower / 1000]
-        ]).astype(np.float32)
+        self.state = np.concatenate(
+            [
+                T_initial,
+                [
+                    self.out_temp[self.epoch],
+                    self.ground_temp[self.epoch],
+                    self.ghi[self.epoch],
+                    self.Occupower / 1000,
+                ],
+            ]
+        ).astype(np.float32)
 
         # Initialize the rewards
         self.flag = 1
@@ -454,7 +484,7 @@ def train(self, states: np.ndarray, actions: np.ndarray) -> None:
 
         # Update the A_d and B_d matrices with the coefficients from the fitted model
         self.A_d = beta[:, : self.n]
-        self.BD_d = beta[:, self.n:]
+        self.BD_d = beta[:, self.n :]
 
         # Set the data-driven flag to True
         self.datadriven = True
diff --git a/sustaingym/envs/building/multiagent_env.py b/sustaingym/envs/building/multiagent_env.py
@@ -1,6 +1,7 @@
 """
 The module implements a multi-agent version of the building environment.
 """
+
 from __future__ import annotations
 
 from collections.abc import Mapping
@@ -54,25 +55,27 @@ def __init__(self, parameters: dict[str, Any]) -> None:
         self.agents = self.possible_agents[:]
 
         self.observation_spaces = {
-            agent: self.single_env.observation_space
-            for agent in self.agents
+            agent: self.single_env.observation_space for agent in self.agents
         }
 
         if self.single_env.is_continuous_action:
             self.action_spaces = {
-                agent: spaces.Box(-1., 1., shape=(1,), dtype=np.float32)
+                agent: spaces.Box(-1.0, 1.0, shape=(1,), dtype=np.float32)
                 for agent in self.agents
             }
         else:
             assert isinstance(self.single_env.action_space, spaces.MultiDiscrete)
             self.action_spaces = {
-                agent: self.single_env.action_space[agent]
-                for agent in self.agents
+                agent: self.single_env.action_space[agent] for agent in self.agents
             }
 
     def step(self, actions: Mapping[int, np.ndarray]) -> tuple[
-            dict[int, np.ndarray], dict[int, float], dict[int, bool],
-            dict[int, bool], dict[int, dict[str, Any]]]:
+        dict[int, np.ndarray],
+        dict[int, float],
+        dict[int, bool],
+        dict[int, bool],
+        dict[int, dict[str, Any]],
+    ]:
         """
         Returns: obss, rewards, terminateds, truncateds, infos
         """
@@ -99,8 +102,9 @@ def step(self, actions: Mapping[int, np.ndarray]) -> tuple[
 
         return obss, rewards, terminateds, truncateds, infos
 
-    def reset(self, seed: int | None = None, options: dict | None = None
-              ) -> tuple[dict[int, np.ndarray], dict[int, dict[str, Any]]]:
+    def reset(
+        self, seed: int | None = None, options: dict | None = None
+    ) -> tuple[dict[int, np.ndarray], dict[int, dict[str, Any]]]:
         """Resets the environment."""
         obs, info = self.single_env.reset(seed=seed, options=options)
         self._state = obs