Skip to content

Commit

Permalink
Merge branch 'ewilk0-pr'
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisyeh96 committed May 13, 2024
2 parents 23ab5e5 + 7346660 commit 820a5b9
Show file tree
Hide file tree
Showing 6 changed files with 559 additions and 153 deletions.
6 changes: 1 addition & 5 deletions sustaingym/envs/building/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,4 @@
from .multiagent_env import MultiAgentBuildingEnv
from .utils import ParameterGenerator

__all__ = [
'BuildingEnv',
'MultiAgentBuildingEnv',
'ParameterGenerator'
]
__all__ = ["BuildingEnv", "MultiAgentBuildingEnv", "ParameterGenerator"]
140 changes: 85 additions & 55 deletions sustaingym/envs/building/env.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
The module implements the BuildingEnv class.
"""

from __future__ import annotations

from typing import Any
Expand Down Expand Up @@ -80,6 +81,7 @@ class BuildingEnv(gym.Env):
timestep: current timestep in episode, from 0 to 288
action_space: structure of actions expected by environment
"""

# Occupancy nonlinear coefficients, collected from page 1299 of
# https://energyplus.net/assets/nrel_custom/pdfs/pdfs_v23.1.0/EngineeringReference.pdf
OCCU_COEF = [
Expand Down Expand Up @@ -120,20 +122,21 @@ def __init__(self, parameters: dict[str, Any]):
"""
self.parameters = parameters

self.n = parameters['n']
self.zones = parameters['zones']
self.target = parameters['target']
self.out_temp = parameters['out_temp']
self.ground_temp = parameters['ground_temp']
self.ghi = parameters['ghi']
self.metabolism = parameters['metabolism']
self.ac_map = parameters['ac_map']
self.maxpower = parameters['max_power']
self.temp_range = parameters['temp_range']
self.reward_pnorm = parameters['reward_pnorm']
self.is_continuous_action = parameters['is_continuous_action']
self.timestep = parameters['time_resolution']
self.Occupower = 0
self.n = parameters["n"]
self.zones = parameters["zones"]
self.target = parameters["target"]
self.out_temp = parameters["out_temp"]
self.ground_temp = parameters["ground_temp"]
self.ghi = parameters["ghi"]
self.metabolism = parameters["metabolism"]
self.ac_map = parameters["ac_map"]
self.maxpower = parameters["max_power"]
self.temp_range = parameters["temp_range"]
self.reward_pnorm = parameters["reward_pnorm"]
self.is_continuous_action = parameters["is_continuous_action"]
self.timestep = parameters["time_resolution"]
self.episode_len = parameters["episode_len"]
self.Occupower = 0.
self.datadriven = False
self.length_of_weather = len(self.out_temp)

Expand All @@ -146,25 +149,30 @@ def __init__(self, parameters: dict[str, Any]):
self.action_space = gym.spaces.Box(self.Qlow, self.Qhigh, dtype=np.float32)
else:
self.action_space = gym.spaces.MultiDiscrete(
(self.Qhigh * self.DISCRETE_LENGTH
- self.Qlow * self.DISCRETE_LENGTH).astype(np.int64)
(
self.Qhigh * self.DISCRETE_LENGTH - self.Qlow * self.DISCRETE_LENGTH
).astype(np.int64)
)

# Set the observation space bounds based on the minimum and maximum temperature
min_T, max_T = self.temp_range
heat_max = 1000
self.low = np.concatenate([
np.ones(self.n + 1) * min_T, # temp of zones and outdoor
[0], # GHI
[min_T], # temp of ground
[-min_T * self.OCCU_COEF_LINEAR / 1000] # occupancy power
]).astype(np.float32)
self.high = np.concatenate([
np.ones(self.n + 1) * max_T, # temp of zones and outdoor
[heat_max], # GHI
[max_T], # temp of ground
[heat_max] # occupancy power
]).astype(np.float32)
self.low = np.concatenate(
[
np.ones(self.n + 1) * min_T, # temp of zones and outdoor
[0], # GHI
[min_T], # temp of ground
[-min_T * self.OCCU_COEF_LINEAR / 1000], # occupancy power
]
).astype(np.float32)
self.high = np.concatenate(
[
np.ones(self.n + 1) * max_T, # temp of zones and outdoor
[heat_max], # GHI
[max_T], # temp of ground
[heat_max], # occupancy power
]
).astype(np.float32)
self.observation_space = gym.spaces.Box(self.low, self.high, dtype=np.float32)

# Set the weight for the power consumption and comfort range
Expand All @@ -184,17 +192,18 @@ def __init__(self, parameters: dict[str, Any]):
self.X_new = self.target

# Stack B and D matrix together for easy calculation
A = parameters['A']
B = parameters['B']
D = parameters['D']
A = parameters["A"]
B = parameters["B"]
D = parameters["D"]
BD = np.hstack((D[:, np.newaxis], B))

# Compute the discrete-time system matrices
self.A_d = expm(A * self.timestep)
self.BD_d = LA.inv(A) @ (self.A_d - np.eye(self.A_d.shape[0])) @ BD

def step(self, action: np.ndarray
) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
def step(
self, action: np.ndarray
) -> tuple[np.ndarray, float, bool, bool, dict[str, Any]]:
"""Steps the environment.
Updates the state of the environment based on the given action and calculates the
Expand Down Expand Up @@ -231,12 +240,12 @@ def step(self, action: np.ndarray
done = False

# Prepare the input matrices X and Y
X = self.state[:self.n].T
X = self.state[: self.n].T
Y = np.insert(
np.append(action, self.ghi[self.epoch]), 0, self.out_temp[self.epoch]
).T
Y = np.insert(Y, 0, self.ground_temp[self.epoch]).T
avg_temp = np.sum(self.state[:self.n]) / self.n
avg_temp = np.sum(self.state[: self.n]) / self.n
meta = self.metabolism[self.epoch]

# If the environment is data-driven, add additional features to the Y matrix
Expand Down Expand Up @@ -273,13 +282,17 @@ def step(self, action: np.ndarray
info = self._get_info()

# self.statelist.append(self.state)
self.state = np.concatenate([
X_new,
[self.out_temp[self.epoch],
self.ground_temp[self.epoch],
self.ghi[self.epoch],
self.Occupower / 1000],
]).astype(np.float32)
self.state = np.concatenate(
[
X_new,
[
self.out_temp[self.epoch],
self.ground_temp[self.epoch],
self.ghi[self.epoch],
self.Occupower / 1000,
],
]
).astype(np.float32)

# Store the action in the actionlist
self.actionlist.append(action * self.maxpower)
Expand All @@ -288,24 +301,32 @@ def step(self, action: np.ndarray
self.epoch += 1

# Check if the environment has reached the end of the weather data
if (
self.epoch % (self.length_of_weather // self.episode_len) == 0
and self.epoch != 0
):
done = True
if self.epoch >= self.length_of_weather - 1:
done = True
self.epoch = 0

# Return the new state, reward, done flag, and info
return self.state, reward, done, done, info

def reset(self, *, seed: int | None = None, options: dict | None = None
) -> tuple[np.ndarray, dict[str, Any]]:
def reset(
self, *, seed: int | None = None, options: dict | None = None
) -> tuple[np.ndarray, dict[str, Any]]:
"""Resets the environment.
Prepares the environment for the next episode by setting the initial
temperatures, average temperature, occupancy, and occupower. The initial state
is constructed by concatenating these variables.
Args:
seed: seed for resetting the environment. An episode is entirely
reproducible no matter the generator used.
seed: seed for resetting the environment. The seed determines which episode
to start at. Increment the seed sequentially to experience episodes
in chronological order. Set seed to None for a random episode.
An episode is entirely reproducible no matter the generator used.
options: optional resetting options
- 'T_initial': np.ndarray, shape [n], initial temperature of each zone
Expand All @@ -317,7 +338,12 @@ def reset(self, *, seed: int | None = None, options: dict | None = None
super().reset(seed=seed, options=options)

# Initialize the episode counter
self.epoch = 0
num_episodes = self.length_of_weather // self.episode_len
if seed is None:
episode = self.np_random.integers(low=0, high=num_episodes)
else:
episode = seed % num_episodes
self.epoch = episode * self.episode_len

# Initialize state and action lists
self.statelist = []
Expand All @@ -339,13 +365,17 @@ def reset(self, *, seed: int | None = None, options: dict | None = None

# Construct the initial state by concatenating relevant variables
self.X_new = T_initial
self.state = np.concatenate([
T_initial,
[self.out_temp[self.epoch],
self.ground_temp[self.epoch],
self.ghi[self.epoch],
self.Occupower / 1000]
]).astype(np.float32)
self.state = np.concatenate(
[
T_initial,
[
self.out_temp[self.epoch],
self.ground_temp[self.epoch],
self.ghi[self.epoch],
self.Occupower / 1000,
],
]
).astype(np.float32)

# Initialize the rewards
self.flag = 1
Expand Down Expand Up @@ -454,7 +484,7 @@ def train(self, states: np.ndarray, actions: np.ndarray) -> None:

# Update the A_d and B_d matrices with the coefficients from the fitted model
self.A_d = beta[:, : self.n]
self.BD_d = beta[:, self.n:]
self.BD_d = beta[:, self.n :]

# Set the data-driven flag to True
self.datadriven = True
22 changes: 13 additions & 9 deletions sustaingym/envs/building/multiagent_env.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
The module implements a multi-agent version of the building environment.
"""

from __future__ import annotations

from collections.abc import Mapping
Expand Down Expand Up @@ -54,25 +55,27 @@ def __init__(self, parameters: dict[str, Any]) -> None:
self.agents = self.possible_agents[:]

self.observation_spaces = {
agent: self.single_env.observation_space
for agent in self.agents
agent: self.single_env.observation_space for agent in self.agents
}

if self.single_env.is_continuous_action:
self.action_spaces = {
agent: spaces.Box(-1., 1., shape=(1,), dtype=np.float32)
agent: spaces.Box(-1.0, 1.0, shape=(1,), dtype=np.float32)
for agent in self.agents
}
else:
assert isinstance(self.single_env.action_space, spaces.MultiDiscrete)
self.action_spaces = {
agent: self.single_env.action_space[agent]
for agent in self.agents
agent: self.single_env.action_space[agent] for agent in self.agents
}

def step(self, actions: Mapping[int, np.ndarray]) -> tuple[
dict[int, np.ndarray], dict[int, float], dict[int, bool],
dict[int, bool], dict[int, dict[str, Any]]]:
dict[int, np.ndarray],
dict[int, float],
dict[int, bool],
dict[int, bool],
dict[int, dict[str, Any]],
]:
"""
Returns: obss, rewards, terminateds, truncateds, infos
"""
Expand All @@ -99,8 +102,9 @@ def step(self, actions: Mapping[int, np.ndarray]) -> tuple[

return obss, rewards, terminateds, truncateds, infos

def reset(self, seed: int | None = None, options: dict | None = None
) -> tuple[dict[int, np.ndarray], dict[int, dict[str, Any]]]:
def reset(
self, seed: int | None = None, options: dict | None = None
) -> tuple[dict[int, np.ndarray], dict[int, dict[str, Any]]]:
"""Resets the environment."""
obs, info = self.single_env.reset(seed=seed, options=options)
self._state = obs
Expand Down
Loading

0 comments on commit 820a5b9

Please sign in to comment.