Truncated is always False

chrisyeh96 · Sep 20, 2023 · edb8d78 · edb8d78
1 parent 884b33f
commit edb8d78
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 23 deletions.
diff --git a/docs/electricitymarketenv.md b/docs/electricitymarketenv.md
@@ -12,7 +12,7 @@ $$
 s(t) = (t, e, a(t-1), x_{t-1}, p_{t-1}, l_{t-1}, \hat{l}_{t:t+k-1}, m_{t-1}, \hat{m}_{t:t+k-1 \mid t}).
 $$
 
-$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$  is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO<sub>2</sub> / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $$k$$ steps.
+$t \in \{0, 1, \dotsc, 288\}$ is the current time step. $e \in \R_+$ is the agent's battery level (in MWh). $a(t-1) \in \R_+^{2 \times k}$ is the previous action. $x_{t-1} \in \R$ is the previous dispatch (in MWh) asked of the agent, and $p_{t-1} \in \R$ is market clearing price from the previous step (in \$/MWh). $l_{t-1} \in \R$  is the previous demand experienced by the agent (in MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted demand for the next $k$ steps. Likewise, $m_{t-1} \in \R$ is the previous MOER experienced by the agent (in kg CO<sub>2</sub> / MWh), while $\hat{l}_{t:t+k \mid t} \in \R^k$ is the forecasted MOER values for the next $k$ steps.
 
 ## Action Space
 Each agent action is a bid $a(t) = (a^c, a^d) \in \R_+^k \times \R_+^k$, representing prices (in \$/MWh) that the agent is willing to pay (or receive) for charging (or discharging) per MWh of energy, for the next $k+1$ time steps starting at time step $t$. The generators are assumed to always bid their fixed true cost of generation. The environment solves the optimal dispatch problem to determine the electricity price $p_t$ (in \$/MWh) and the agent's dispatch $x_t \in \R$, which is the amount of energy (in MWh) that the agent is obligated to sell into or buy from the grid within the next time step. The dispatch in turn determines the storage system's next energy level. We also provide a wrapper that discretizes the action space into 3 actions only: charge, do nothing, or discharge.

diff --git a/sustaingym/envs/cogen/env.py b/sustaingym/envs/cogen/env.py
@@ -164,13 +164,13 @@ def _get_obs(self) -> dict[str, Any]:
         """Get the current observation.
 
         The following values must be updated before calling self._get_obs():
-        - self.current_timestep
+        - self.t
         - self.current_day
         - self.current_action
         """
-        forecast_df = self._forecast_from_time(self.current_day, self.current_timestep)
+        forecast_df = self._forecast_from_time(self.current_day, self.t)
         obs = {
-            'Time': np.array([self.current_timestep / self.timesteps_per_day], dtype=np.float32),
+            'Time': np.array([self.t / self.timesteps_per_day], dtype=np.float32),
             'Prev_Action': self.current_action,
             'TAMB': forecast_df['Ambient Temperature'].values,
             'PAMB': forecast_df['Ambient Pressure'].values,
@@ -213,8 +213,7 @@ def reset(self, seed: int | None = None, options: dict | None = None
         else:
             self.current_day = seed % self.n_days
 
-        self.current_timestep = 0  # keeps track of which timestep we are on
-        self.current_terminated = False
+        self.t = 0  # keeps track of which timestep we are on
 
         # initial action is drawn randomly from the action space
         # not sure if this is reasonable, TODO: check this
@@ -351,14 +350,6 @@ def _compute_reward(self, obs: dict[str, Any], action: dict[str, Any]
         }
         return total_reward, reward_breakdown
 
-    def _terminated(self) -> bool:
-        """Determines if the episode is terminated or not.
-
-        Returns:
-            terminated: True if the episode is terminated, False otherwise
-        """
-        return self.current_timestep > self.timesteps_per_day - 1
-
     def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
         """Run one timestep of the Cogen environment's dynamics.
 
@@ -369,7 +360,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
             obs: new state
             reward: reward
             terminated: termination flag
-            truncated: always ``False``
+            truncated: always ``False``, since there is no intermediate stopping condition
             info: info dict
         """
         # compute the loss of taking the action
@@ -379,18 +370,18 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
         self.current_action = action
 
         # update the current timestep
-        self.current_timestep += 1
+        self.t += 1
 
         # update the current observation
         self.obs = self._get_obs()
 
         # update the current done
-        self.current_terminated = self._terminated()
+        terminated = (self.t >= self.timesteps_per_day)
 
         # always False due to no intermediate stopping conditions
         truncated = False
 
-        return self.obs, self.current_reward, self.current_terminated, truncated, self.current_info
+        return self.obs, self.current_reward, terminated, truncated, self.current_info
 
     def close(self):
         return
diff --git a/sustaingym/envs/evcharging/env.py b/sustaingym/envs/evcharging/env.py
@@ -262,9 +262,7 @@ def step(self, action: np.ndarray
 
             reward: scheduler's performance metric per timestep
             terminated: whether episode is terminated
-            truncated: whether episode has reached a time limit. Here, truncated
-                is always the same as terminated because the episode is always
-                across the entire day.
+            truncated: always ``False``, since there is no intermediate stopping condition
             info: auxiliary useful information
 
                 - 'num_evs': int, number of charging sessions in episode.
@@ -298,8 +296,7 @@ def step(self, action: np.ndarray
         reward = self._get_reward(schedule)
         info = self._get_info()
 
-        # terminated, truncated at end of day
-        return observation, reward, done, done, info
+        return observation, reward, done, False, info
 
     def reset(self, *, seed: int | None = None, options: dict | None = None
               ) -> tuple[dict[str, Any], dict[str, Any]]: