yura-hb
diff --git a/‎diploma_thesis/agents/utils/memory/memory.py
+7 b/‎diploma_thesis/agents/utils/memory/memory.py
+7
diff --git a/‎diploma_thesis/agents/utils/memory/replay_memory.py
+2-2 b/‎diploma_thesis/agents/utils/memory/replay_memory.py
+2-2
diff --git a/‎diploma_thesis/agents/utils/nn/layers/linear.py
+10-9 b/‎diploma_thesis/agents/utils/nn/layers/linear.py
+10-9
diff --git a/‎diploma_thesis/agents/utils/rl/ddqn.py
+1-1 b/‎diploma_thesis/agents/utils/rl/ddqn.py
+1-1
@@ -50,6 +50,7 @@ def is_filled(self):
 class NotReadyException(BaseException):
     pass
 
+stores = 0
 
 class Memory(Generic[_Configuration], metaclass=ABCMeta):
 
@@ -63,6 +64,12 @@ def store(self, records: List[Record] | List[List[Record]]):
 
             self.buffer.extend(records)
         else:
+            global stores
+
+            stores += 1
+
+            print(f'Stores {stores} {len(records)}')
+
             self.buffer.extend(records)
 
     def sample(self, return_info: bool = False) -> List[Record]:
 
@@ -1,4 +1,4 @@
-from torchrl.data import TensorDictReplayBuffer
+from torchrl.data import TensorDictReplayBuffer, SamplerWithoutReplacement
 
 from .memory import *
 from .memory import Configuration as MemoryConfiguration
@@ -23,7 +23,7 @@ def from_cli(cls, parameters: Dict):
 class ReplayMemory(Memory[Configuration]):
 
     def __make_buffer__(self) -> ReplayBuffer | TensorDictReplayBuffer:
-        sampler = self.configuration.sampler.make() if self.configuration.sampler else RandomSampler()
+        sampler = self.configuration.sampler.make() if self.configuration.sampler else SamplerWithoutReplacement()
         cls = None
 
         params = dict(
 
@@ -38,15 +38,16 @@ def __init__(self,
         self.__build__()
 
     def initialize_parameters(self, input) -> None:
-        self.linear.initialize_parameters(input)
-
-        if isinstance(self.linear, torch.nn.Linear):
-            match self.initialization:
-                case Initialization.orthogonal.value:
-                    torch.nn.init.orthogonal_(self.linear.weight)
-                    torch.nn.init.zeros_(self.linear.bias)
-                case _:
-                    pass
+        if self.linear.has_uninitialized_params():
+            self.linear.initialize_parameters(input)
+
+            if isinstance(self.linear, torch.nn.Linear):
+                match self.initialization:
+                    case Initialization.orthogonal.value:
+                        torch.nn.init.orthogonal_(self.linear.weight)
+                        torch.nn.init.zeros_(self.linear.bias)
+                    case _:
+                        pass
 
     def forward(self, batch: torch.FloatTensor) -> torch.FloatTensor:
         batch = self.linear(batch)
 
@@ -14,6 +14,6 @@ def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase):
 
         target = self.__get_action_values__(self.target_model, batch.next_state, best_actions)
 
-        q = batch.reward.squeeze() + self.return_estimator.discount_factor * target * (1 - batch.done.squeeze().int())
+        q = batch.reward.squeeze() + self.return_estimator.discount_factor * target #* (1 - batch.done.squeeze().int())
 
         return q