From 3dc9ab5b5412299250c2f51670de7ae50a49ddc3 Mon Sep 17 00:00:00 2001
From: Luuk van Oijen <lazyluuk.channel@gmail.com>
Date: Wed, 17 Jan 2024 13:50:52 +0100
Subject: [PATCH] update dqn to heap initialize state arrays

---
 src/dqn.rs | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/src/dqn.rs b/src/dqn.rs
index d668cf0..4a0b85d 100644
--- a/src/dqn.rs
+++ b/src/dqn.rs
@@ -133,9 +133,9 @@ where
 
     pub fn train_dqn(
         &mut self,
-        states: [[f32; STATE_SIZE]; BATCH],
+        states: Box<[[f32; STATE_SIZE]; BATCH]>,
         actions: [[f32; ACTION_SIZE]; BATCH],
-        next_states: [[f32; STATE_SIZE]; BATCH],
+        next_states: Box<[[f32; STATE_SIZE]; BATCH]>,
         rewards: [f32; BATCH],
         dones: [bool; BATCH],
     ) {
@@ -148,7 +148,7 @@ where
 
         // Convert to tensors and normalize the states for better training
         let states: Tensor<Rank2<BATCH, STATE_SIZE>, f32, _> =
-            self.dev.tensor(states).normalize::<Axis<1>>(0.001);
+            self.dev.tensor(*states).normalize::<Axis<1>>(0.001);
 
         // Convert actions to tensors and get the max action for each batch
         let actions: Tensor<Rank1<BATCH>, usize, _> = self.dev.tensor(actions.map(|a| {
@@ -165,7 +165,7 @@ where
 
         // Convert to tensors and normalize the states for better training
         let next_states: Tensor<Rank2<BATCH, STATE_SIZE>, f32, _> =
-            self.dev.tensor(next_states).normalize::<Axis<1>>(0.001);
+            self.dev.tensor(*next_states).normalize::<Axis<1>>(0.001);
 
         // Compute the estimated Q-value for the action
         for _step in 0..20 {
@@ -203,9 +203,21 @@ where
     ) {
         loop {
             // Initialize batch
-            let mut states: [[f32; STATE_SIZE]; BATCH] = [[0.0; STATE_SIZE]; BATCH];
+            let mut states: Box<[[f32; STATE_SIZE]; BATCH]> = {
+                let b = vec![0.0; STATE_SIZE].into_boxed_slice();
+                let big = unsafe { Box::from_raw(Box::into_raw(b) as *mut [f32; STATE_SIZE]) };
+
+                let b = vec![*big; BATCH].into_boxed_slice();
+                unsafe { Box::from_raw(Box::into_raw(b) as *mut [[f32; STATE_SIZE]; BATCH]) }
+            };
             let mut actions: [[f32; ACTION_SIZE]; BATCH] = [[0.0; ACTION_SIZE]; BATCH];
-            let mut next_states: [[f32; STATE_SIZE]; BATCH] = [[0.0; STATE_SIZE]; BATCH];
+            let mut next_states: Box<[[f32; STATE_SIZE]; BATCH]> = {
+                let b = vec![0.0; STATE_SIZE].into_boxed_slice();
+                let big = unsafe { Box::from_raw(Box::into_raw(b) as *mut [f32; STATE_SIZE]) };
+
+                let b = vec![*big; BATCH].into_boxed_slice();
+                unsafe { Box::from_raw(Box::into_raw(b) as *mut [[f32; STATE_SIZE]; BATCH]) }
+            };
             let mut rewards: [f32; BATCH] = [0.0; BATCH];
             let mut dones = [false; BATCH];