Skip to content

Commit

Permalink
Merge pull request #82 from UoA-CARES/dev/dmcs
Browse files Browse the repository at this point in the history
Dev/dmcs
  • Loading branch information
dvalenciar authored Oct 11, 2023
2 parents 0a48ba3 + 8618e92 commit 0b94ca4
Show file tree
Hide file tree
Showing 10 changed files with 448 additions and 127 deletions.
44 changes: 38 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,56 @@ The CARES reinforcement learning bed used as the foundation for RL related proje
Consult the repository [wiki](https://github.com/UoA-CARES/cares_reinforcement_learning/wiki) for a guide on how to use the package

## Installation Instructions
`git clone` the repository
If you want to utilise the GPU with Pytorch install CUDA first - https://developer.nvidia.com/cuda-toolkit

If you would like to leverage your machine's GPU, uncomment the optional dependencies in the `requirements.txt` before moving on.
Install Pytorch following the instructions here - https://pytorch.org/get-started/locally/

`git clone` the repository into your desired directory on your local machine

Run `pip3 install -r requirements.txt` in the **root directory** of the package

To make the module **globally accessible** in your working environment run `pip3 install --editable .` in the **project root**

## Running an Example
This repository includes a script that allows you to run any OpenAI environment – provided you comply with all the dependencies for that environment. These examples make use of the package, and can provide an example on how one might use the package in their own environments.
This repository includes a script that allows you to run any OpenAI Gymnasium (https://github.com/Farama-Foundation/Gymnasium) or Deep Mind Control Suite (https://github.com/google-deepmind/dm_control) environment – provided you comply with all the dependencies for that environment. These examples make use of the package, and can provide an example on how one might use the package in their own environments.

`example_training_loops.py` takes in hyperparameters that allow you to customise the training run enviromment – OpenAI or DMCS Environment - or RL algorithm. Use `python3 example_training_loops.py -h` for help on what parameters are available for customisation.

An example is found below for running on the OpenAI and DMCS environments with TD3:
```
python3 example_training_loops.py openai --task HalfCheetah-v4 TD3
`example_training_loops.py` takes in hyperparameters that allow you to customise the training run – OpenAI Environment, training steps, gamma... Use `python3 example_training_loops.py -h` for help on what hyperparameters are available for customisation.
python3 example_training_loops.py dmcs --domain ball_in_cup --task catch TD3
```

### Data Outputs
All data from a training run is saved into '~/cares_rl_logs'. A folder will be created for each training run named as 'ALGORITHM-TASK-YY_MM_DD:HH:MM:SS', e.g. 'TD3-HalfCheetah-v4-23_10_11_08:47:22'. This folder will contain the following directories and information saved during the training session:

An example is found below:
```
python3 example_training_loops.py --task 'Pendulum-v1' --algorithm PPO --max_steps_training 1000000 --seed 571 --gamma 0.99 --actor_lr 0.0001 --critic_lr 0.001
ALGORITHM-TASK-YY_MM_DD:HH:MM:SS/
├─ config.py
├─ data
| ├─ train.csv
| ├─ eval.csv
├─ figures
| ├─ eval.png
| ├─ train.png
├─ models
| ├─ model.pht
| ├─ CHECKPOINT_N.pht
| ├─ ...
├─ videos
| ├─ STEP.mp4
| ├─ ...
```

### Plotting
The plotting utility will plot the data contained in the training data. An example of how to plot the data from one or multiple training sessions together is shown below. Running 'python3 plotter.py -h' will provide details on the plotting parameters.

```
python3 plotter.py -s ~/cares_rl_logs -d ~/cares_rl_logs/ALGORITHM-TASK-YY_MM_DD:HH:MM:SS -w 20
```

## Package Structure

Expand Down
180 changes: 180 additions & 0 deletions cares_reinforcement_learning/util/EnvironmentFactory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import logging

import cv2

import gym
from gym import spaces

from dm_control import suite

import numpy as np
from collections import deque

# from typing import override
from functools import cached_property

class EnvironmentFactory:
def __init__(self) -> None:
pass

def create_environment(self, gym_environment, args):
logging.info(f"Training Environment: {gym_environment}")
if gym_environment == 'dmcs':
env = DMCSImage(args=args) if args['image_observation'] else DMCS(args=args)
elif gym_environment == "openai":
env = OpenAIGym(args=args)
else:
raise ValueError(f"Unkown environment: {gym_environment}")
return env

class OpenAIGym:
def __init__(self, args) -> None:
logging.info(f"Training task {args['task']}")
self.env = gym.make(args["task"], render_mode="rgb_array")
self.set_seed(args['seed'])

@cached_property
def max_action_value(self):
return self.env.action_space.high[0]

@cached_property
def min_action_value(self):
return self.env.action_space.low[0]

@cached_property
def observation_space(self):
return self.env.observation_space.shape[0]

@cached_property
def action_num(self):
if type(self.env.action_space) == spaces.Box:
action_num = self.env.action_space.shape[0]
elif type(self.env.action_space) == spaces.Discrete:
action_num= self.env.action_space.n
else:
raise ValueError(f"Unhandled action space type: {type(self.env.action_space)}")
return action_num

def set_seed(self, seed):
self.env.action_space.seed(seed)

def reset(self):
state, _ = self.env.reset()
return state

def step(self, action):
state, reward, done, truncated, _ = self.env.step(action)
return state, reward, done, truncated

def grab_frame(self):
frame = self.env.render()
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to BGR for use with OpenCV
return frame

class OpenAIGymImage:
def __init__(self, args, k=3):
self.k = k # number of frames to be stacked
self.frames_stacked = deque([], maxlen=k)

super().__init__(args=args)

# @override
@property
def observation_space(self):
raise NotImplementedError("Not Implemented Yet")

# @override
def reset(self):
_ = self.env.reset()
frame = self.env.physics.render(84, 84, camera_id=0) # --> shape= (84, 84, 3)
frame = np.moveaxis(frame, -1, 0) # --> shape= (3, 84, 84)
for _ in range(self.k):
self.frames_stacked.append(frame)
stacked_frames = np.concatenate(list(self.frames_stacked), axis=0) # --> shape = (9, 84, 84)
return stacked_frames

# @override
def step(self, action):
time_step = self.env.step(action)
reward, done = time_step.reward, time_step.last()
frame = self.env.physics.render(84, 84, camera_id=0)
frame = np.moveaxis(frame, -1, 0)
self.frames_stacked.append(frame)
stacked_frames = np.concatenate(list(self.frames_stacked), axis=0)
return stacked_frames, reward, done, False # for consistency with open ai gym just add false for truncated

class DMCS:
def __init__(self, args) -> None:
logging.info(f"Training on Domain {args['domain']}")
logging.info(f"Training with Task {args['task']}")

self.env = suite.load(args['domain'], args['task'], task_kwargs={'random': args['seed']})

@cached_property
def min_action_value(self):
return self.env.action_spec().minimum[0]

@cached_property
def max_action_value(self):
return self.env.action_spec().maximum[0]

@cached_property
def observation_space(self):
time_step = self.env.reset()
observation = np.hstack(list(time_step.observation.values())) # # e.g. position, orientation, joint_angles
return len(observation)

@cached_property
def action_num(self):
return self.env.action_spec().shape[0]

def set_seed(self, seed):
self.env = suite.load(self.env.domain, self.env.task, task_kwargs={'random': seed})

def reset(self):
time_step = self.env.reset()
observation = np.hstack(list(time_step.observation.values())) # # e.g. position, orientation, joint_angles
return observation

def step(self, action):
time_step = self.env.step(action)
state, reward, done = np.hstack(list(time_step.observation.values())), time_step.reward, time_step.last()
return state, reward, done, False # for consistency with open ai gym just add false for truncated

def grab_frame(self):
frame = self.env.physics.render(camera_id=0, height=240, width=300)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to BGR for use with OpenCV
return frame

# TODO paramatise the observation size 3x84x84
class DMCSImage(DMCS):
def __init__(self, args, k=3):
self.k = k # number of frames to be stacked
self.frames_stacked = deque([], maxlen=k)

super().__init__(args=args)

# @override
@property
def observation_space(self):
raise NotImplementedError("Not Implemented Yet")

# @override
def reset(self):
_ = self.env.reset()
frame = self.env.physics.render(84, 84, camera_id=0) # --> shape= (84, 84, 3)
frame = np.moveaxis(frame, -1, 0) # --> shape= (3, 84, 84)
for _ in range(self.k):
self.frames_stacked.append(frame)
stacked_frames = np.concatenate(list(self.frames_stacked), axis=0) # --> shape = (9, 84, 84)
return stacked_frames

# @override
def step(self, action):
time_step = self.env.step(action)
reward, done = time_step.reward, time_step.last()
frame = self.env.physics.render(84, 84, camera_id=0)
frame = np.moveaxis(frame, -1, 0)
self.frames_stacked.append(frame)
stacked_frames = np.concatenate(list(self.frames_stacked), axis=0)
return stacked_frames, reward, done, False # for consistency with open ai gym just add false for truncated
20 changes: 18 additions & 2 deletions cares_reinforcement_learning/util/Record.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import logging
import cv2

import pandas as pd

Expand Down Expand Up @@ -36,6 +37,18 @@ def __init__(self, glob_log_dir=None, log_dir=None, network=None, config=None) -
with open(f'{self.directory}/config.yml', 'w') as outfile:
yaml.dump(config, outfile, default_flow_style=False)

def start_video(self, file_name, frame):
fps = 30
video_name = f"{self.directory}/videos/{file_name}.mp4"
height, width, channels = frame.shape
self.video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

def stop_video(self):
self.video.release()

def log_video(self, frame):
self.video.write(frame)

def log_info(self, info, display=False):
self.info_data = pd.concat([self.info_data, pd.DataFrame([info])], ignore_index=True)
self.save_data(self.info_data, "info", info, display=display)
Expand Down Expand Up @@ -70,7 +83,7 @@ def save_data(self, data_frame, filename, logs, display=True):
string = '| ' + string + ' |'

if display:
print(string)
logging.info(string)

def save(self):
logging.info(f"Saving final outputs")
Expand All @@ -97,4 +110,7 @@ def __initialise_directories(self):
os.mkdir(f'{self.directory}/models')

if not os.path.exists(f'{self.directory}/figures'):
os.mkdir(f'{self.directory}/figures')
os.mkdir(f'{self.directory}/figures')

if not os.path.exists(f'{self.directory}/videos'):
os.mkdir(f'{self.directory}/videos')
1 change: 1 addition & 0 deletions cares_reinforcement_learning/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .NetworkFactory import NetworkFactory
from .Record import Record
from .EnvironmentFactory import EnvironmentFactory

Loading

0 comments on commit 0b94ca4

Please sign in to comment.