Merge pull request #82 from UoA-CARES/dev/dmcs

Dev/dmcs
UoA-CARES · Oct 11, 2023 · 0b94ca4 · 0b94ca4
2 parents 0a48ba3 + 8618e92
commit 0b94ca4
Show file tree

Hide file tree

Showing 10 changed files with 448 additions and 127 deletions.
diff --git a/README.md b/README.md
@@ -8,24 +8,56 @@ The CARES reinforcement learning bed used as the foundation for RL related proje
 Consult the repository [wiki](https://github.com/UoA-CARES/cares_reinforcement_learning/wiki) for a guide on how to use the package
 
 ## Installation Instructions
-`git clone` the repository
+If you want to utilise the GPU with Pytorch install CUDA first - https://developer.nvidia.com/cuda-toolkit
 
-If you would like to leverage your machine's GPU, uncomment the optional dependencies in the `requirements.txt` before moving on.
+Install Pytorch following the instructions here - https://pytorch.org/get-started/locally/
+
+`git clone` the repository into your desired directory on your local machine
 
 Run `pip3 install -r requirements.txt` in the **root directory** of the package
 
 To make the module **globally accessible** in your working environment run `pip3 install --editable .` in the **project root**
 
 ## Running an Example
-This repository includes a script that allows you to run any OpenAI environment – provided you comply with all the dependencies for that environment. These examples make use of the package, and can provide an example on how one might use the package in their own environments.
+This repository includes a script that allows you to run any OpenAI Gymnasium (https://github.com/Farama-Foundation/Gymnasium) or Deep Mind Control Suite (https://github.com/google-deepmind/dm_control) environment – provided you comply with all the dependencies for that environment. These examples make use of the package, and can provide an example on how one might use the package in their own environments.
+
+`example_training_loops.py` takes in hyperparameters that allow you to customise the training run enviromment – OpenAI or DMCS Environment - or RL algorithm. Use `python3 example_training_loops.py -h` for help on what parameters are available for customisation.
+
+An example is found below for running on the OpenAI and DMCS environments with TD3:
+```
+python3 example_training_loops.py openai --task HalfCheetah-v4 TD3
+
 
-`example_training_loops.py` takes in hyperparameters that allow you to customise the training run – OpenAI Environment, training steps, gamma... Use `python3 example_training_loops.py -h` for help on what hyperparameters are available for customisation.
+python3 example_training_loops.py dmcs --domain ball_in_cup --task catch TD3
+```
+
+### Data Outputs
+All data from a training run is saved into '~/cares_rl_logs'. A folder will be created for each training run named as 'ALGORITHM-TASK-YY_MM_DD:HH:MM:SS', e.g. 'TD3-HalfCheetah-v4-23_10_11_08:47:22'. This folder will contain the following directories and information saved during the training session:
 
-An example is found below:
 ```
-python3 example_training_loops.py --task 'Pendulum-v1' --algorithm PPO --max_steps_training 1000000 --seed 571 --gamma 0.99 --actor_lr 0.0001 --critic_lr 0.001
+ALGORITHM-TASK-YY_MM_DD:HH:MM:SS/
+├─ config.py
+├─ data
+|  ├─ train.csv
+|  ├─ eval.csv
+├─ figures
+|  ├─ eval.png
+|  ├─ train.png
+├─ models
+|  ├─ model.pht
+|  ├─ CHECKPOINT_N.pht
+|  ├─ ...
+├─ videos
+|  ├─ STEP.mp4
+|  ├─ ...
 ```
 
+### Plotting
+The plotting utility will plot the data contained in the training data. An example of how to plot the data from one or multiple training sessions together is shown below. Running 'python3 plotter.py -h' will provide details on the plotting parameters.
+
+```
+python3 plotter.py -s ~/cares_rl_logs -d ~/cares_rl_logs/ALGORITHM-TASK-YY_MM_DD:HH:MM:SS -w 20
+```
 
 ## Package Structure
 

diff --git a/cares_reinforcement_learning/util/EnvironmentFactory.py b/cares_reinforcement_learning/util/EnvironmentFactory.py
@@ -0,0 +1,180 @@
+import logging
+
+import cv2
+
+import gym
+from gym import spaces
+
+from dm_control import suite
+
+import numpy as np
+from collections import deque
+
+# from typing import override
+from functools import cached_property
+
+class EnvironmentFactory:
+    def __init__(self) -> None:
+        pass
+
+    def create_environment(self, gym_environment, args):
+        logging.info(f"Training Environment: {gym_environment}")
+        if gym_environment == 'dmcs':
+            env = DMCSImage(args=args) if args['image_observation'] else DMCS(args=args)
+        elif gym_environment == "openai":
+            env = OpenAIGym(args=args)
+        else:
+            raise ValueError(f"Unkown environment: {gym_environment}")
+        return env
+
+class OpenAIGym:
+    def __init__(self, args) -> None:
+        logging.info(f"Training task {args['task']}")
+        self.env = gym.make(args["task"], render_mode="rgb_array")
+        self.set_seed(args['seed'])
+
+    @cached_property
+    def max_action_value(self):
+        return self.env.action_space.high[0]
+
+    @cached_property
+    def min_action_value(self):
+        return self.env.action_space.low[0]
+
+    @cached_property
+    def observation_space(self):
+        return self.env.observation_space.shape[0]
+
+    @cached_property
+    def action_num(self):
+        if type(self.env.action_space) == spaces.Box:
+            action_num = self.env.action_space.shape[0]
+        elif type(self.env.action_space) == spaces.Discrete:
+            action_num= self.env.action_space.n
+        else:
+            raise ValueError(f"Unhandled action space type: {type(self.env.action_space)}")
+        return action_num
+
+    def set_seed(self, seed):
+        self.env.action_space.seed(seed)
+
+    def reset(self):
+        state, _ = self.env.reset()
+        return state
+
+    def step(self, action):
+        state, reward, done, truncated, _ = self.env.step(action)
+        return state, reward, done, truncated
+
+    def grab_frame(self):
+        frame = self.env.render()
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to BGR for use with OpenCV
+        return frame
+
+class OpenAIGymImage:
+    def __init__(self, args, k=3):
+        self.k    = k  # number of frames to be stacked
+        self.frames_stacked = deque([], maxlen=k)
+
+        super().__init__(args=args)
+
+    # @override
+    @property
+    def observation_space(self):
+        raise NotImplementedError("Not Implemented Yet")
+
+    # @override
+    def reset(self):
+        _ = self.env.reset()
+        frame = self.env.physics.render(84, 84, camera_id=0) # --> shape= (84, 84, 3)
+        frame = np.moveaxis(frame, -1, 0)                    # --> shape= (3, 84, 84)
+        for _ in range(self.k):
+            self.frames_stacked.append(frame)
+        stacked_frames = np.concatenate(list(self.frames_stacked), axis=0) # --> shape = (9, 84, 84)
+        return stacked_frames
+
+    # @override
+    def step(self, action):
+        time_step    = self.env.step(action)
+        reward, done = time_step.reward, time_step.last()
+        frame = self.env.physics.render(84, 84, camera_id=0)
+        frame = np.moveaxis(frame, -1, 0)
+        self.frames_stacked.append(frame)
+        stacked_frames = np.concatenate(list(self.frames_stacked), axis=0)
+        return stacked_frames, reward, done, False # for consistency with open ai gym just add false for truncated
+
+class DMCS:
+    def __init__(self, args) -> None:
+        logging.info(f"Training on Domain {args['domain']}")
+        logging.info(f"Training with Task {args['task']}")
+
+        self.env = suite.load(args['domain'], args['task'], task_kwargs={'random': args['seed']})
+
+    @cached_property
+    def min_action_value(self):
+        return self.env.action_spec().minimum[0]
+
+    @cached_property
+    def max_action_value(self):
+        return self.env.action_spec().maximum[0]
+
+    @cached_property
+    def observation_space(self):
+        time_step = self.env.reset()
+        observation = np.hstack(list(time_step.observation.values())) # # e.g. position, orientation, joint_angles
+        return len(observation)
+
+    @cached_property
+    def action_num(self):
+        return self.env.action_spec().shape[0]
+
+    def set_seed(self, seed):
+        self.env = suite.load(self.env.domain, self.env.task, task_kwargs={'random': seed})
+
+    def reset(self):
+        time_step = self.env.reset()
+        observation = np.hstack(list(time_step.observation.values())) # # e.g. position, orientation, joint_angles
+        return observation
+
+    def step(self, action):
+        time_step = self.env.step(action)
+        state, reward, done = np.hstack(list(time_step.observation.values())), time_step.reward, time_step.last()
+        return state, reward, done, False # for consistency with open ai gym just add false for truncated
+
+    def grab_frame(self):
+        frame = self.env.physics.render(camera_id=0, height=240, width=300)
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to BGR for use with OpenCV
+        return frame
+
+# TODO paramatise the observation size 3x84x84
+class DMCSImage(DMCS):
+    def __init__(self, args, k=3):
+        self.k    = k  # number of frames to be stacked
+        self.frames_stacked = deque([], maxlen=k)
+
+        super().__init__(args=args)
+
+    # @override
+    @property
+    def observation_space(self):
+        raise NotImplementedError("Not Implemented Yet")
+
+    # @override
+    def reset(self):
+        _ = self.env.reset()
+        frame = self.env.physics.render(84, 84, camera_id=0) # --> shape= (84, 84, 3)
+        frame = np.moveaxis(frame, -1, 0)                    # --> shape= (3, 84, 84)
+        for _ in range(self.k):
+            self.frames_stacked.append(frame)
+        stacked_frames = np.concatenate(list(self.frames_stacked), axis=0) # --> shape = (9, 84, 84)
+        return stacked_frames
+
+    # @override
+    def step(self, action):
+        time_step    = self.env.step(action)
+        reward, done = time_step.reward, time_step.last()
+        frame = self.env.physics.render(84, 84, camera_id=0)
+        frame = np.moveaxis(frame, -1, 0)
+        self.frames_stacked.append(frame)
+        stacked_frames = np.concatenate(list(self.frames_stacked), axis=0)
+        return stacked_frames, reward, done, False # for consistency with open ai gym just add false for truncated
diff --git a/cares_reinforcement_learning/util/Record.py b/cares_reinforcement_learning/util/Record.py
@@ -1,5 +1,6 @@
 import os
 import logging
+import cv2
 
 import pandas as pd
 
@@ -36,6 +37,18 @@ def __init__(self, glob_log_dir=None, log_dir=None, network=None, config=None) -
             with open(f'{self.directory}/config.yml', 'w') as outfile:
                 yaml.dump(config, outfile, default_flow_style=False)
 
+    def start_video(self, file_name, frame):
+        fps        = 30
+        video_name = f"{self.directory}/videos/{file_name}.mp4"
+        height, width, channels = frame.shape
+        self.video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
+
+    def stop_video(self):
+        self.video.release()
+
+    def log_video(self, frame):
+        self.video.write(frame)
+
     def log_info(self, info, display=False):
         self.info_data = pd.concat([self.info_data, pd.DataFrame([info])], ignore_index=True)
         self.save_data(self.info_data, "info", info, display=display)
@@ -70,7 +83,7 @@ def save_data(self, data_frame, filename, logs, display=True):
         string = '| ' + string + ' |'
 
         if display:
-            print(string)
+            logging.info(string)
 
     def save(self):
         logging.info(f"Saving final outputs")
@@ -97,4 +110,7 @@ def __initialise_directories(self):
             os.mkdir(f'{self.directory}/models')
 
         if not os.path.exists(f'{self.directory}/figures'):
-            os.mkdir(f'{self.directory}/figures') 
+            os.mkdir(f'{self.directory}/figures')
+
+        if not os.path.exists(f'{self.directory}/videos'):
+            os.mkdir(f'{self.directory}/videos')
diff --git a/cares_reinforcement_learning/util/__init__.py b/cares_reinforcement_learning/util/__init__.py
@@ -1,3 +1,4 @@
 from .NetworkFactory import NetworkFactory
 from .Record import Record
+from .EnvironmentFactory import EnvironmentFactory