Skip to content

Commit

Permalink
Saving Memory Buffer (#196)
Browse files Browse the repository at this point in the history
* Adding saving logic to Memory buffer - using pickle

* Functional pytest - saving/loading memory buffer correctly
  • Loading branch information
beardyFace authored Dec 3, 2024
1 parent c8aa296 commit e29e7fd
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 0 deletions.
14 changes: 14 additions & 0 deletions cares_reinforcement_learning/memory/memory_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import pickle
import random

import numpy as np
Expand Down Expand Up @@ -422,3 +423,16 @@ def clear(self) -> None:
self.sum_tree = SumTree(self.max_capacity)
self.max_priority = self.min_priority
self.beta = self.init_beta

def save(self, filepath: str, file_name: str) -> None:
with open(f"{filepath}/{file_name}.pkl", "wb") as f:
pickle.dump(self, f)

@classmethod
def load(cls, file_path: str, file_name: str):
"""
Simple object deserialization given a filename
"""
with open(f"{file_path}/{file_name}.pkl", "rb") as f:
obj = pickle.load(f)
return obj
15 changes: 15 additions & 0 deletions cares_reinforcement_learning/util/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from torch import nn

import cares_reinforcement_learning.util.plotter as plt
from cares_reinforcement_learning.memory import MemoryBuffer
from cares_reinforcement_learning.util.configurations import SubscriptableClass


Expand All @@ -30,6 +31,7 @@ def __init__(
algorithm: str,
task: str,
agent: nn.Module | None = None,
memory_buffer: MemoryBuffer | None = None,
) -> None:

self.best_reward = float("-inf")
Expand All @@ -49,6 +51,8 @@ def __init__(

self.video: cv2.VideoWriter = None

self.memory_buffer = memory_buffer

self.log_count = 0

self.__initialise_base_directory()
Expand Down Expand Up @@ -94,6 +98,12 @@ def start_video(self, file_name: str, frame, fps=30):
def stop_video(self) -> None:
self.video.release()

def save_memory(self):
if self.memory_buffer is not None:
self.memory_buffer.save(
filepath=f"{self.current_sub_directory}/memory", file_name="memory"
)

def save_agent(self, file_name: str, folder_name: str) -> None:
if self.agent is not None:
self.agent.save_models(
Expand Down Expand Up @@ -133,6 +143,8 @@ def log_train(self, display: bool = False, **logs) -> None:
)
self._save_data(self.train_data, "train.csv", logs, display=display)

self.save_memory()

plt.plot_train(
self.train_data,
f"Training-{self.algorithm}-{self.task}",
Expand Down Expand Up @@ -210,6 +222,9 @@ def __initialise_sub_directory(self) -> None:
if not os.path.exists(f"{self.current_sub_directory}/videos"):
os.makedirs(f"{self.current_sub_directory}/videos")

if not os.path.exists(f"{self.current_sub_directory}/memory"):
os.makedirs(f"{self.current_sub_directory}/memory")

@staticmethod
def create_base_directory(
gym: str,
Expand Down
112 changes: 112 additions & 0 deletions tests/memory/test_save.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
from pathlib import Path

import numpy as np
import torch
from memory import memory_buffer, memory_buffer_1e6

from cares_reinforcement_learning.memory import MemoryBuffer


def _images_the_same(image_one, image_two):
if image_one is None and image_two is None:
return True

return image_one.shape == image_two.shape and not (
np.bitwise_xor(image_one, image_two).any()
)


def _compare_buffer(memory, loaded_memory, experience_size, image_state=False):

assert len(memory) == len(loaded_memory)

for i in range(experience_size):
a = memory.memory_buffers[i]
b = loaded_memory.memory_buffers[i]
if i == 0 and image_state:
for image_a, image_b in zip(a, b):
assert _images_the_same(image_a, image_b)
else:
assert np.array_equal(a, b)

assert memory.max_capacity == loaded_memory.max_capacity

assert memory.current_size == loaded_memory.current_size

assert memory.tree_pointer == loaded_memory.tree_pointer

assert memory.init_beta == loaded_memory.init_beta

assert memory.beta == loaded_memory.beta

assert memory.d_beta == loaded_memory.d_beta

assert memory.min_priority == loaded_memory.min_priority

assert memory.max_priority == loaded_memory.max_priority

sum_tree_levels = memory.sum_tree.levels
loaded_sum_tree_levels = loaded_memory.sum_tree.levels

assert len(sum_tree_levels) == len(loaded_sum_tree_levels)

for i, _ in enumerate(sum_tree_levels):
a = sum_tree_levels[i]
b = loaded_sum_tree_levels[i]
assert np.array_equal(a, b)

inverse_tree_levels = memory.inverse_tree.levels
loaded_inverse_tree_levels = loaded_memory.inverse_tree.levels

assert len(inverse_tree_levels) == len(loaded_inverse_tree_levels)

for i, _ in enumerate(inverse_tree_levels):
a = inverse_tree_levels[i]
b = loaded_inverse_tree_levels[i]
assert np.array_equal(a, b)


def test_save_load_image(memory_buffer_1e6):
data_size = 10

observation_size = (3, 84, 84)

experience = []
for i in range(data_size):
test_image = np.random.randint(0, 255, size=observation_size)
experience = [test_image, i, i, i, i % 2, i]
memory_buffer_1e6.add(*experience)

home = Path.home()

file_path = f"{home}/cares_rl_logs/test"
if not os.path.exists(f"{file_path}"):
os.makedirs(f"{file_path}")

memory_buffer_1e6.save(file_path, "memory_buffer")

loaded_memory = MemoryBuffer.load(file_path, "memory_buffer")

_compare_buffer(memory_buffer_1e6, loaded_memory, len(experience), image_state=True)


def test_save_load_vector(memory_buffer_1e6):
data_size = 1000000

experience = []
for i in range(data_size):
experience = [i, i, i, i, i % 2, i]
memory_buffer_1e6.add(*experience)

home = Path.home()

file_path = f"{home}/cares_rl_logs/test"
if not os.path.exists(f"{file_path}"):
os.makedirs(f"{file_path}")

memory_buffer_1e6.save(file_path, "memory_buffer")

loaded_memory = MemoryBuffer.load(file_path, "memory_buffer")

_compare_buffer(memory_buffer_1e6, loaded_memory, len(experience))

0 comments on commit e29e7fd

Please sign in to comment.