diff --git a/docs/electricitymarketenv.md b/docs/electricitymarketenv.md
index 1cc57ec..06975a7 100644
--- a/docs/electricitymarketenv.md
+++ b/docs/electricitymarketenv.md
@@ -12,7 +12,7 @@ $$
s(t) = (t, e, a(t-1), x_{t-1}, p_{t-1}, l_{t-1}, \hat{l}_{t:t+k-1}, m_{t-1}, \hat{m}_{t:t+k-1 \mid t}).
$$
-$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$ is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO2 / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $$k$$ steps.
+$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$ is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO2 / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $k$ steps.
## Action Space
Each agent action is a bid $a(t) = (a^c, a^d) \in \R_+^k \times \R_+^k$, representing prices (in \$/MWh) that the agent is willing to pay (or receive) for charging (or discharging) per MWh of energy, for the next $k+1$ time steps starting at time step $t$. The generators are assumed to always bid their fixed true cost of generation. The environment solves the optimal dispatch problem to determine the electricity price $p_t$ (in \$/MWh) and the agent's dispatch $x_t \in \R$, which is the amount of energy (in MWh) that the agent is obligated to sell into or buy from the grid within the next time step. The dispatch in turn determines the storage system's next energy level. We also provide a wrapper that discretizes the action space into 3 actions only: charge, do nothing, or discharge.
diff --git a/sustaingym/envs/cogen/env.py b/sustaingym/envs/cogen/env.py
index c47a2e4..ac0d102 100644
--- a/sustaingym/envs/cogen/env.py
+++ b/sustaingym/envs/cogen/env.py
@@ -164,13 +164,13 @@ def _get_obs(self) -> dict[str, Any]:
"""Get the current observation.
The following values must be updated before calling self._get_obs():
- - self.current_timestep
+ - self.t
- self.current_day
- self.current_action
"""
- forecast_df = self._forecast_from_time(self.current_day, self.current_timestep)
+ forecast_df = self._forecast_from_time(self.current_day, self.t)
obs = {
- 'Time': np.array([self.current_timestep / self.timesteps_per_day], dtype=np.float32),
+ 'Time': np.array([self.t / self.timesteps_per_day], dtype=np.float32),
'Prev_Action': self.current_action,
'TAMB': forecast_df['Ambient Temperature'].values,
'PAMB': forecast_df['Ambient Pressure'].values,
@@ -213,8 +213,7 @@ def reset(self, seed: int | None = None, options: dict | None = None
else:
self.current_day = seed % self.n_days
- self.current_timestep = 0 # keeps track of which timestep we are on
- self.current_terminated = False
+ self.t = 0 # keeps track of which timestep we are on
# initial action is drawn randomly from the action space
# not sure if this is reasonable, TODO: check this
@@ -351,14 +350,6 @@ def _compute_reward(self, obs: dict[str, Any], action: dict[str, Any]
}
return total_reward, reward_breakdown
- def _terminated(self) -> bool:
- """Determines if the episode is terminated or not.
-
- Returns:
- terminated: True if the episode is terminated, False otherwise
- """
- return self.current_timestep > self.timesteps_per_day - 1
-
def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
"""Run one timestep of the Cogen environment's dynamics.
@@ -369,7 +360,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
obs: new state
reward: reward
terminated: termination flag
- truncated: always ``False``
+ truncated: always ``False``, since there is no intermediate stopping condition
info: info dict
"""
# compute the loss of taking the action
@@ -379,18 +370,18 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
self.current_action = action
# update the current timestep
- self.current_timestep += 1
+ self.t += 1
# update the current observation
self.obs = self._get_obs()
# update the current done
- self.current_terminated = self._terminated()
+ terminated = (self.t >= self.timesteps_per_day)
# always False due to no intermediate stopping conditions
truncated = False
- return self.obs, self.current_reward, self.current_terminated, truncated, self.current_info
+ return self.obs, self.current_reward, terminated, truncated, self.current_info
def close(self):
return
diff --git a/sustaingym/envs/evcharging/env.py b/sustaingym/envs/evcharging/env.py
index 85bac2d..73fefa0 100755
--- a/sustaingym/envs/evcharging/env.py
+++ b/sustaingym/envs/evcharging/env.py
@@ -262,9 +262,7 @@ def step(self, action: np.ndarray
reward: scheduler's performance metric per timestep
terminated: whether episode is terminated
- truncated: whether episode has reached a time limit. Here, truncated
- is always the same as terminated because the episode is always
- across the entire day.
+ truncated: always ``False``, since there is no intermediate stopping condition
info: auxiliary useful information
- 'num_evs': int, number of charging sessions in episode.
@@ -298,8 +296,7 @@ def step(self, action: np.ndarray
reward = self._get_reward(schedule)
info = self._get_info()
- # terminated, truncated at end of day
- return observation, reward, done, done, info
+ return observation, reward, done, False, info
def reset(self, *, seed: int | None = None, options: dict | None = None
) -> tuple[dict[str, Any], dict[str, Any]]: