diff --git a/docs/electricitymarketenv.md b/docs/electricitymarketenv.md index 1cc57ec..06975a7 100644 --- a/docs/electricitymarketenv.md +++ b/docs/electricitymarketenv.md @@ -12,7 +12,7 @@ $$ s(t) = (t, e, a(t-1), x_{t-1}, p_{t-1}, l_{t-1}, \hat{l}_{t:t+k-1}, m_{t-1}, \hat{m}_{t:t+k-1 \mid t}). $$ -$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$ is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO2 / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $$k$$ steps. +$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$ is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO2 / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $k$ steps. ## Action Space Each agent action is a bid $a(t) = (a^c, a^d) \in \R_+^k \times \R_+^k$, representing prices (in \$/MWh) that the agent is willing to pay (or receive) for charging (or discharging) per MWh of energy, for the next $k+1$ time steps starting at time step $t$. The generators are assumed to always bid their fixed true cost of generation. The environment solves the optimal dispatch problem to determine the electricity price $p_t$ (in \$/MWh) and the agent's dispatch $x_t \in \R$, which is the amount of energy (in MWh) that the agent is obligated to sell into or buy from the grid within the next time step. The dispatch in turn determines the storage system's next energy level. We also provide a wrapper that discretizes the action space into 3 actions only: charge, do nothing, or discharge. diff --git a/sustaingym/envs/cogen/env.py b/sustaingym/envs/cogen/env.py index c47a2e4..ac0d102 100644 --- a/sustaingym/envs/cogen/env.py +++ b/sustaingym/envs/cogen/env.py @@ -164,13 +164,13 @@ def _get_obs(self) -> dict[str, Any]: """Get the current observation. The following values must be updated before calling self._get_obs(): - - self.current_timestep + - self.t - self.current_day - self.current_action """ - forecast_df = self._forecast_from_time(self.current_day, self.current_timestep) + forecast_df = self._forecast_from_time(self.current_day, self.t) obs = { - 'Time': np.array([self.current_timestep / self.timesteps_per_day], dtype=np.float32), + 'Time': np.array([self.t / self.timesteps_per_day], dtype=np.float32), 'Prev_Action': self.current_action, 'TAMB': forecast_df['Ambient Temperature'].values, 'PAMB': forecast_df['Ambient Pressure'].values, @@ -213,8 +213,7 @@ def reset(self, seed: int | None = None, options: dict | None = None else: self.current_day = seed % self.n_days - self.current_timestep = 0 # keeps track of which timestep we are on - self.current_terminated = False + self.t = 0 # keeps track of which timestep we are on # initial action is drawn randomly from the action space # not sure if this is reasonable, TODO: check this @@ -351,14 +350,6 @@ def _compute_reward(self, obs: dict[str, Any], action: dict[str, Any] } return total_reward, reward_breakdown - def _terminated(self) -> bool: - """Determines if the episode is terminated or not. - - Returns: - terminated: True if the episode is terminated, False otherwise - """ - return self.current_timestep > self.timesteps_per_day - 1 - def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]: """Run one timestep of the Cogen environment's dynamics. @@ -369,7 +360,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo obs: new state reward: reward terminated: termination flag - truncated: always ``False`` + truncated: always ``False``, since there is no intermediate stopping condition info: info dict """ # compute the loss of taking the action @@ -379,18 +370,18 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo self.current_action = action # update the current timestep - self.current_timestep += 1 + self.t += 1 # update the current observation self.obs = self._get_obs() # update the current done - self.current_terminated = self._terminated() + terminated = (self.t >= self.timesteps_per_day) # always False due to no intermediate stopping conditions truncated = False - return self.obs, self.current_reward, self.current_terminated, truncated, self.current_info + return self.obs, self.current_reward, terminated, truncated, self.current_info def close(self): return diff --git a/sustaingym/envs/evcharging/env.py b/sustaingym/envs/evcharging/env.py index 85bac2d..73fefa0 100755 --- a/sustaingym/envs/evcharging/env.py +++ b/sustaingym/envs/evcharging/env.py @@ -262,9 +262,7 @@ def step(self, action: np.ndarray reward: scheduler's performance metric per timestep terminated: whether episode is terminated - truncated: whether episode has reached a time limit. Here, truncated - is always the same as terminated because the episode is always - across the entire day. + truncated: always ``False``, since there is no intermediate stopping condition info: auxiliary useful information - 'num_evs': int, number of charging sessions in episode. @@ -298,8 +296,7 @@ def step(self, action: np.ndarray reward = self._get_reward(schedule) info = self._get_info() - # terminated, truncated at end of day - return observation, reward, done, done, info + return observation, reward, done, False, info def reset(self, *, seed: int | None = None, options: dict | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: