From 4ea5e2e53f7fe5e3c7ab4983dedbab5a7b984e88 Mon Sep 17 00:00:00 2001
From: Ole-Christoffer Granmo <ole.granmo@uia.no>
Date: Sun, 25 Aug 2024 23:22:09 +0200
Subject: [PATCH] Graph Tsetlin Machine

---
 .../CIFAR2Demo3x3LiteralBudget.py             |  28 +-
 .../SequenceCountInterpretabilityDemo.py      |   2 +-
 .../SequenceInterpretabilityDemo.py           |  14 +-
 tmu/clause_bank/base_clause_bank.py           |  13 +-
 tmu/clause_bank/clause_bank.py                |  81 +++-
 tmu/lib/include/ClauseBank.h                  |  12 +
 tmu/lib/src/ClauseBank.c                      | 375 ++++++++++++++----
 tmu/lib/src/Tools.c                           |  51 +--
 .../classification/coalesced_classifier.py    |   4 +
 9 files changed, 412 insertions(+), 168 deletions(-)

diff --git a/examples/classification/CIFAR2Demo3x3LiteralBudget.py b/examples/classification/CIFAR2Demo3x3LiteralBudget.py
index 457745da..4caa7c48 100644
--- a/examples/classification/CIFAR2Demo3x3LiteralBudget.py
+++ b/examples/classification/CIFAR2Demo3x3LiteralBudget.py
@@ -13,6 +13,7 @@
 
 _LOGGER = logging.getLogger(__name__)
 
+logging.basicConfig(level=logging.INFO)
 
 def preprocess_cifar10_data(resolution, animals):
     """
@@ -35,9 +36,9 @@ def preprocess_cifar10_data(resolution, animals):
     # Initialize empty arrays for quantized images
     X_train = np.empty(
         (X_train_org.shape[0], X_train_org.shape[1], X_train_org.shape[2], X_train_org.shape[3], resolution),
-        dtype=np.uint8)
+        dtype=np.uint32)
     X_test = np.empty((X_test_org.shape[0], X_test_org.shape[1], X_test_org.shape[2], X_test_org.shape[3], resolution),
-                      dtype=np.uint8)
+                      dtype=np.uint32)
 
     # Quantize pixel values
     for z in range(resolution):
@@ -73,17 +74,18 @@ def run_ensemble(ensemble_params):
     # Unpack parameters
     args, X_train, Y_train, X_test, Y_test, ensemble = ensemble_params
 
-    T = int(args.clauses * 0.75)
     tm = TMClassifier(
         args.clauses,
-        T,
+        args.T,
         args.s,
         platform=args.platform,
         patch_dim=(args.patch_size, args.patch_size),
         number_of_state_bits_ta=args.number_of_state_bits_ta,
         weighted_clauses=args.weighted_clauses,
         literal_drop_p=args.literal_drop_p,
-        max_included_literals=args.max_included_literals
+        max_included_literals=args.max_included_literals,
+        spatio_temporal=True,
+        depth=args.depth
     )
 
     ensemble_results = metrics(args)
@@ -149,18 +151,20 @@ def main(args):
 
 def default_args(**kwargs):
     parser = argparse.ArgumentParser()
-    parser.add_argument("--max_included_literals", type=int, default=32)
-    parser.add_argument("--clauses", type=int, default=8000)
+    parser.add_argument("--max-included-literals", type=int, default=32)
+    parser.add_argument("--clauses", type=int, default=100)
+    parser.add_argument("--T", type=int, default=750)
     parser.add_argument("--s", type=float, default=10.0)
     parser.add_argument("--platform", type=str, default="GPU")
-    parser.add_argument("--patch_size", type=int, default=3)
+    parser.add_argument("--patch-size", type=int, default=3)
     parser.add_argument("--resolution", type=int, default=8)
-    parser.add_argument("--number_of_state_bits_ta", type=int, default=8)
-    parser.add_argument("--literal_drop_p", type=float, default=0.0)
+    parser.add_argument("--number-of-state-bits-ta", type=int, default=8)
+    parser.add_argument("--literal-drop-p", type=float, default=0.0)
+    parser.add_argument("--depth", type=int, default=1)
     parser.add_argument("--epochs", type=int, default=250)
-    parser.add_argument("--ensembles", type=int, default=5)
+    parser.add_argument("--ensembles", type=int, default=1)
     parser.add_argument("--weighted-clauses", type=bool, default=True)
-    parser.add_argument("--use_multiprocessing", action='store_true', help="Use multiprocessing to run ensembles in parallel")
+    parser.add_argument("--use-multiprocessing", action='store_false', help="Use multiprocessing to run ensembles in parallel")
     args = parser.parse_args()
     for key, value in kwargs.items():
         if key in args.__dict__:
diff --git a/examples/classification/SequenceCountInterpretabilityDemo.py b/examples/classification/SequenceCountInterpretabilityDemo.py
index f976df27..5db92ff5 100644
--- a/examples/classification/SequenceCountInterpretabilityDemo.py
+++ b/examples/classification/SequenceCountInterpretabilityDemo.py
@@ -661,7 +661,7 @@ def default_args(**kwargs):
     parser.add_argument("--platform", default='CPU', type=str)
     parser.add_argument("--T", default=100*2, type=int)
     parser.add_argument("--s", default=1.0, type=float)
-    parser.add_argument("--sequence-length", default=6, type=int)
+    parser.add_argument("--sequence-length", default=10, type=int)
     parser.add_argument("--noise", default=0.01, type=float)
     parser.add_argument("--examples", default=40000, type=int)
     parser.add_argument("--depth", default=2, type=int)
diff --git a/examples/classification/SequenceInterpretabilityDemo.py b/examples/classification/SequenceInterpretabilityDemo.py
index bbfb139f..f6d10da9 100644
--- a/examples/classification/SequenceInterpretabilityDemo.py
+++ b/examples/classification/SequenceInterpretabilityDemo.py
@@ -31,8 +31,8 @@ def main(args):
         position_2 = position_1+1
         position_3 = position_1+2
 
-#        position_2 = np.random.randint(position_1+1, args.sequence_length-1)
-#        position_3 = np.random.randint(position_2+1, args.sequence_length)
+        #position_2 = np.random.randint(position_1+1, args.sequence_length-1)
+        #position_3 = np.random.randint(position_2+1, args.sequence_length)
         
         if Y_train[i] == 0:
             X_train[i,0,position_1,0] = 1
@@ -72,8 +72,8 @@ def main(args):
         position_2 = position_1+1
         position_3 = position_1+2
 
-#        position_2 = np.random.randint(position_1+1, args.sequence_length-1)
-#        position_3 = np.random.randint(position_2+1, args.sequence_length)
+        #position_2 = np.random.randint(position_1+1, args.sequence_length-1)
+        #position_3 = np.random.randint(position_2+1, args.sequence_length)
         
         if Y_test[i] == 0:
             X_test[i,0,position_1,0] = 1
@@ -215,10 +215,10 @@ def default_args(**kwargs):
     parser.add_argument("--platform", default='CPU', type=str)
     parser.add_argument("--T", default=100*2, type=int)
     parser.add_argument("--s", default=1.0, type=float)
-    parser.add_argument("--sequence-length", default=6, type=int)
-    parser.add_argument("--noise", default=0.01, type=float, help="Noisy XOR")
+    parser.add_argument("--sequence-length", default=10, type=int)
+    parser.add_argument("--noise", default=0.0, type=float, help="Noisy XOR")
     parser.add_argument("--examples", default=40000, type=int, help="Noisy XOR")
-    parser.add_argument("--depth", default=2, type=int)
+    parser.add_argument("--depth", default=1, type=int)
     parser.add_argument("--number-of-state-bits-ta", default=10, type=int)
     parser.add_argument("--max-included-literals", default=32, type=int)
 
diff --git a/tmu/clause_bank/base_clause_bank.py b/tmu/clause_bank/base_clause_bank.py
index 4c5c69ad..6fb50d59 100644
--- a/tmu/clause_bank/base_clause_bank.py
+++ b/tmu/clause_bank/base_clause_bank.py
@@ -40,6 +40,8 @@ def __init__(
         self.type_ia_ii_feedback_ratio = type_ia_ii_feedback_ratio
         self.spatio_temporal = spatio_temporal
         self.depth = depth
+        self.hypervector_size = 256
+        self.hypervector_bits = 2
 
         if len(X_shape) == 2:
             self.dim = (X_shape[1], 1, 1)
@@ -53,17 +55,20 @@ def __init__(
         if self.patch_dim is None:
             self.patch_dim = (self.dim[0] * self.dim[1] * self.dim[2], 1)
 
-        self.number_of_features = int(
-            self.patch_dim[0] * self.patch_dim[1] * self.dim[2] + (self.dim[0] - self.patch_dim[0]) + (
-                    self.dim[1] - self.patch_dim[1]))
+        self.number_of_input_features = int(self.patch_dim[0] * self.patch_dim[1] * self.dim[2])
+
+        self.number_of_features = int(self.patch_dim[0] * self.patch_dim[1] * self.dim[2])
 
         self.number_of_patches = int((self.dim[0] - self.patch_dim[0] + 1) * (self.dim[1] - self.patch_dim[1] + 1))
 
         if self.spatio_temporal:
-            self.number_of_features += self.number_of_clauses*4*self.depth;
+            self.number_of_features += self.depth*self.hypervector_size
+
+        self.number_of_input_literals = self.number_of_input_features * 2
 
         self.number_of_literals = self.number_of_features * 2
 
+        self.number_of_input_ta_chunks = int((self.number_of_input_literals - 1) / 32 + 1)
         self.number_of_ta_chunks = int((self.number_of_literals - 1) / 32 + 1)
 
         self.max_included_literals = max_included_literals if max_included_literals else self.number_of_literals
diff --git a/tmu/clause_bank/clause_bank.py b/tmu/clause_bank/clause_bank.py
index 61a9756d..00279e97 100644
--- a/tmu/clause_bank/clause_bank.py
+++ b/tmu/clause_bank/clause_bank.py
@@ -67,6 +67,8 @@ def __init__(
         self.type_ia_feedback_counter = np.zeros(self.number_of_clauses, dtype=np.uint32, order="c")
         
         if self.spatio_temporal:
+            self.xi_hypervector = np.empty(self.number_of_patches * self.number_of_ta_chunks, dtype=np.uint32, order="c")
+
             self.clause_value_in_patch = np.empty(self.number_of_patches * self.number_of_clauses, dtype=np.uint32, order="c")
             self.clause_value_in_patch_tmp = np.empty(self.number_of_patches * self.number_of_clauses, dtype=np.uint32, order="c")
 
@@ -79,6 +81,12 @@ def __init__(
 
             self.attention = np.empty(self.number_of_ta_chunks, dtype=np.uint32, order="c")
 
+            self.hypervectors = np.empty((self.number_of_clauses, self.hypervector_bits), dtype=np.uint32, order="c")
+            indexes = np.arange(self.hypervector_size, dtype=np.uint32)
+            for i in range(self.number_of_clauses):
+                self.hypervectors[i,:] = np.random.choice(indexes, size=(self.hypervector_bits), replace=False)
+            self.hypervectors = self.hypervectors.reshape(self.number_of_clauses*self.hypervector_bits)
+
         # Incremental Clause Evaluation
         self.literal_clause_map = np.empty(
             (int(self.number_of_literals * self.number_of_clauses)),
@@ -122,7 +130,8 @@ def _cffi_init(self):
         self.ptr_output_one_patches = ffi.cast("unsigned int *", self.output_one_patches.ctypes.data)
         self.ptr_literal_clause_count = ffi.cast("unsigned int *", self.literal_clause_count.ctypes.data)
         self.tiafc_p = ffi.cast("unsigned int *", self.type_ia_feedback_counter.ctypes.data)
-        
+        self.xih_p = ffi.cast("unsigned int *", self.xi_hypervector.ctypes.data)
+
         if self.spatio_temporal:
             self.cvip_p = ffi.cast("unsigned int *", self.clause_value_in_patch.ctypes.data)
             self.cvipt_p = ffi.cast("unsigned int *", self.clause_value_in_patch_tmp.ctypes.data)
@@ -135,6 +144,7 @@ def _cffi_init(self):
             self.ctvtl_p = ffi.cast("unsigned int *", self.clause_truth_value_transitions_length.ctypes.data)
 
             self.a_p = ffi.cast("unsigned int *", self.attention.ctypes.data)
+            self.hv_p = ffi.cast("unsigned int *", self.hypervectors.ctypes.data)
 
         # Clause Initialization
         self.ptr_ta_state = ffi.cast("unsigned int *", self.clause_bank.ctypes.data)
@@ -177,13 +187,24 @@ def calculate_clause_outputs_predict(self, encoded_X, e):
 
         if not self.incremental or self.spatio_temporal:
             if self.spatio_temporal:
+                lib.cb_prepare_hypervector(
+                    self.number_of_input_features,
+                    self.number_of_patches,
+                    self.hypervector_size,
+                    self.depth,
+                    xi_p,
+                    self.xih_p
+                )
+
                 lib.cb_calculate_spatio_temporal_features(
                     self.ptr_ta_state,
                     self.number_of_clauses,
-                    self.number_of_literals,
+                    self.number_of_features,
                     self.number_of_state_bits_ta,
                     self.number_of_patches,
                     self.depth,
+                    self.hypervector_size,
+                    self.hypervector_bits,
                     self.cvip_p,
                     self.cvipt_p,
                     self.ctc_p,
@@ -192,7 +213,8 @@ def calculate_clause_outputs_predict(self, encoded_X, e):
                     self.ctvt_p,
                     self.ctvtl_p,
                     self.a_p,
-                    xi_p
+                    self.hv_p,
+                    self.xih_p
                 )
 
                 lib.cb_calculate_clause_outputs_predict_spatio_temporal(
@@ -208,7 +230,7 @@ def calculate_clause_outputs_predict(self, encoded_X, e):
                     self.cfcb_p,
                     self.ctvt_p,
                     self.ctvtl_p,
-                    xi_p
+                    self.xih_p
                 )
             else:
                 lib.cb_calculate_clause_outputs_predict(
@@ -218,7 +240,7 @@ def calculate_clause_outputs_predict(self, encoded_X, e):
                     self.number_of_state_bits_ta,
                     self.number_of_patches,
                     self.co_p,
-                    xi_p
+                    self.xih_p
                 )
             return self.clause_output
 
@@ -258,13 +280,24 @@ def calculate_clause_outputs_update(self, literal_active, encoded_X, e):
         la_p = ffi.cast("unsigned int *", literal_active.ctypes.data)
 
         if self.spatio_temporal:
+            lib.cb_prepare_hypervector(
+                self.number_of_input_features,
+                self.number_of_patches,
+                self.hypervector_size,
+                self.depth,
+                xi_p,
+                self.xih_p
+            )
+
             lib.cb_calculate_spatio_temporal_features(
                 self.ptr_ta_state,
                 self.number_of_clauses,
-                self.number_of_literals,
+                self.number_of_features,
                 self.number_of_state_bits_ta,
                 self.number_of_patches,
                 self.depth,
+                self.hypervector_size,
+                self.hypervector_bits,
                 self.cvip_p,
                 self.cvipt_p,
                 self.ctc_p,
@@ -273,7 +306,8 @@ def calculate_clause_outputs_update(self, literal_active, encoded_X, e):
                 self.ctvt_p,
                 self.ctvtl_p,
                 self.a_p,
-                xi_p
+                self.hv_p,
+                self.xih_p
             )
 
             lib.cb_calculate_clause_outputs_update_spatio_temporal(
@@ -290,7 +324,7 @@ def calculate_clause_outputs_update(self, literal_active, encoded_X, e):
                 self.cfcb_p,
                 self.ctvt_p,
                 self.ctvtl_p,
-                xi_p
+                self.xih_p
             )
         else:
             lib.cb_calculate_clause_outputs_update(
@@ -310,13 +344,24 @@ def calculate_clause_outputs_patchwise(self, encoded_X, e):
         xi_p = ffi.cast("unsigned int *", encoded_X[e, :].ctypes.data)
 
         if self.spatio_temporal:
+            lib.cb_prepare_hypervector(
+                self.number_of_input_features,
+                self.number_of_patches,
+                self.hypervector_size,
+                self.depth,
+                xi_p,
+                self.xih_p
+            )
+
             lib.cb_calculate_spatio_temporal_features(
                 self.ptr_ta_state,
                 self.number_of_clauses,
-                self.number_of_literals,
+                self.number_of_features,
                 self.number_of_state_bits_ta,
                 self.number_of_patches,
                 self.depth,
+                self.hypervector_size,
+                self.hypervector_bits,
                 self.cvip_p,
                 self.cvipt_p,
                 self.ctc_p,
@@ -325,7 +370,8 @@ def calculate_clause_outputs_patchwise(self, encoded_X, e):
                 self.ctvt_p,
                 self.ctvtl_p,
                 self.a_p,
-                xi_p
+                self.hv_p,
+                self.xih_p
             )
 
         lib.cb_calculate_clause_outputs_patchwise(
@@ -374,7 +420,7 @@ def type_i_feedback(
                 self.cfcb_p,
                 self.ctvt_p,
                 self.ctvtl_p,
-                ptr_xi
+                self.xih_p
             )
         else:
             lib.cb_type_i_feedback(
@@ -426,7 +472,7 @@ def type_ii_feedback(
                 self.cfcb_p,
                 self.ctvt_p,
                 self.ctvtl_p,
-                ptr_xi
+                self.xih_p
             )
         else:
             lib.cb_type_ii_feedback(
@@ -543,19 +589,14 @@ def prepare_X(
             self,
             X
     ):
-        if self.spatio_temporal:
-            spatio_temporal_features = self.number_of_clauses*4*self.depth
-        else:
-            spatio_temporal_features = 0
-
         return tmu.tools.encode(
             X,
             X.shape[0],
             self.number_of_patches,
-            self.number_of_ta_chunks,
+            self.number_of_input_ta_chunks,
             self.dim,
             self.patch_dim,
-            spatio_temporal_features
+            0
         )
 
     def prepare_X_autoencoder(
@@ -564,7 +605,7 @@ def prepare_X_autoencoder(
             X_csc,
             active_output
     ):
-        X = np.ascontiguousarray(np.empty(int(self.number_of_ta_chunks), dtype=np.uint32))
+        X = np.ascontiguousarray(np.empty(int(self.number_of_input_ta_chunks), dtype=np.uint32))
         return X_csr, X_csc, active_output, X
 
     def produce_autoencoder_example(
diff --git a/tmu/lib/include/ClauseBank.h b/tmu/lib/include/ClauseBank.h
index 5137ec9a..34200bfd 100644
--- a/tmu/lib/include/ClauseBank.h
+++ b/tmu/lib/include/ClauseBank.h
@@ -117,6 +117,15 @@ void cb_type_iii_feedback(
     unsigned int target
 );
 
+void cb_prepare_hypervector(
+    int number_of_input_features,
+    int number_of_patches,
+    int hypervector_size,
+    int depth,
+    unsigned int *Xi,
+    unsigned int *Xi_hypervector
+);
+
 void cb_calculate_spatio_temporal_features(
         unsigned int *ta_state,
         int number_of_clauses,
@@ -124,6 +133,8 @@ void cb_calculate_spatio_temporal_features(
         int number_of_state_bits,
         int number_of_patches,
         int depth,
+        int hypervector_size,
+        int hypervector_bits,
         unsigned int *clause_value_in_patch,
         unsigned int *clause_new_value_in_patch,
         unsigned int *clause_true_consecutive,
@@ -132,6 +143,7 @@ void cb_calculate_spatio_temporal_features(
         unsigned int *clause_truth_value_transitions,
         unsigned int *clause_truth_value_transitions_length,
         unsigned int *attention,
+        unsigned int *hypervectors,
         unsigned int *Xi
 );
 
diff --git a/tmu/lib/src/ClauseBank.c b/tmu/lib/src/ClauseBank.c
index 86ddd2c5..14e7a7cf 100644
--- a/tmu/lib/src/ClauseBank.c
+++ b/tmu/lib/src/ClauseBank.c
@@ -1453,13 +1453,68 @@ void cb_identify_temporal_truth_value_transitions(
 	}
 }
 
+void cb_prepare_hypervector(
+	int number_of_input_features,
+	int number_of_patches,
+	int hypervector_size,
+	int depth,
+	unsigned int *Xi,
+	unsigned int *Xi_hypervector
+)
+{
+	int number_of_features = number_of_input_features + hypervector_size*depth;
+	int number_of_literals = number_of_features * 2;
+
+	int number_of_input_literals = number_of_input_features * 2;
+
+	unsigned int number_of_ta_chunks = (number_of_literals-1)/32 + 1;
+	unsigned int number_of_input_ta_chunks = (number_of_input_literals-1)/32 + 1;
+
+	for (int patch = 0; patch < number_of_patches; ++patch) {
+		for (int k = 0; k < number_of_input_features; ++k) {
+			int chunk_nr = k / 32;
+			int chunk_pos = k % 32;
+
+			if (Xi[patch*number_of_input_ta_chunks + chunk_nr] & (1 << chunk_pos)) {
+				chunk_nr = (hypervector_size*depth + k) / 32;
+				chunk_pos = (hypervector_size*depth + k) % 32;
+				Xi_hypervector[patch*number_of_ta_chunks + chunk_nr] |= (1 << chunk_pos);
+
+				chunk_nr = (hypervector_size*depth + k + number_of_features) / 32;
+				chunk_pos = (hypervector_size*depth + k + number_of_features) % 32;
+				Xi_hypervector[patch*number_of_ta_chunks + chunk_nr] &= ~(1 << chunk_pos);
+			} else {
+				chunk_nr = (hypervector_size*depth + k) / 32;
+				chunk_pos = (hypervector_size*depth + k) % 32;
+				Xi_hypervector[patch*number_of_ta_chunks + chunk_nr] &= ~(1 << chunk_pos);
+
+				chunk_nr = (hypervector_size*depth + k + number_of_features) / 32;
+				chunk_pos = (hypervector_size*depth + k + number_of_features) % 32;
+				Xi_hypervector[patch*number_of_ta_chunks + chunk_nr] |= (1 << chunk_pos);
+			}
+		}
+
+		for (int k = 0; k < hypervector_size*depth; ++k) {
+			int chunk_nr = k / 32;
+			int chunk_pos = k % 32;
+			Xi_hypervector[patch*number_of_ta_chunks + chunk_nr] &= ~(1 << chunk_pos);
+
+			chunk_nr = (k + number_of_features) / 32;
+			chunk_pos = (k + number_of_features) % 32;
+			Xi_hypervector[patch*number_of_ta_chunks + chunk_nr] |= (1 << chunk_pos);
+		}
+	}
+}
+
 void cb_calculate_spatio_temporal_features(
         unsigned int *ta_state,
         int number_of_clauses,
-        int number_of_literals,
+        int number_of_features,
         int number_of_state_bits,
         int number_of_patches,
         int depth,
+        int hypervector_size,
+        int hypervector_bits,
         unsigned int *clause_value_in_patch,
         unsigned int *clause_new_value_in_patch,
         unsigned int *clause_true_consecutive,
@@ -1468,13 +1523,16 @@ void cb_calculate_spatio_temporal_features(
         unsigned int *clause_truth_value_transitions,
         unsigned int *clause_truth_value_transitions_length,
         unsigned int *attention,
+        unsigned int *hypervectors,
         unsigned int *Xi
 )
 {
+	int dim_x = 28;
+
 	unsigned int chunk_nr;
 	unsigned int chunk_pos;
 
-	int number_of_spatio_temporal_features = number_of_clauses*4*depth;
+	int number_of_literals = number_of_features * 2;
 
 	unsigned int filter;
 	if (((number_of_literals) % 32) != 0) {
@@ -1486,40 +1544,25 @@ void cb_calculate_spatio_temporal_features(
 	unsigned int number_of_ta_chunks = (number_of_literals-1)/32 + 1;
 
 	memset(attention, 0, number_of_ta_chunks * sizeof(unsigned int));
-	for (int k = number_of_spatio_temporal_features; k < number_of_literals/2; ++k) {
+	for (int k = hypervector_size*depth; k < number_of_features; ++k) {
 		int chunk_nr = k / 32;
 		int chunk_pos = k % 32;
 
 		attention[chunk_nr] |= (1U << chunk_pos);
 
-		chunk_nr = (k + number_of_literals / 2) / 32;
-		chunk_pos = (k + number_of_literals / 2) % 32;
+		chunk_nr = (k + number_of_features) / 32;
+		chunk_pos = (k + number_of_features) % 32;
 
 		attention[chunk_nr] |= (1U << chunk_pos);
 	}
 
-	// Set all spatio-temporal features to false
-	
-	// Initialize all spatio-temporal features to False
-
-	for (int k = 0; k < number_of_spatio_temporal_features; ++k) {
-		for (int patch = 0; patch < number_of_patches; ++patch) {
-			chunk_nr = k / 32;
-			chunk_pos = k % 32;
-			Xi[patch*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
-
-			chunk_nr = (k + number_of_literals/2) / 32;
-			chunk_pos = (k + number_of_literals/2) % 32;
-			Xi[patch*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
-		}
-	}
-
 	for (int d = 0; d < depth; ++d) {
 		for (int j = 0; j < number_of_clauses; j++) {
 			unsigned int clause_pos = j*number_of_ta_chunks*number_of_state_bits; // Calculates the position of the Tsetlin automata states of the current clause
 
+			// Evaluate clause in each patch
 			for (int patch = 0; patch < number_of_patches; ++patch) {
-				unsigned int clause_output = cb_calculate_clause_output_with_literal_active(
+				clause_value_in_patch[patch] = cb_calculate_clause_output_with_literal_active(
 					&ta_state[clause_pos],
 					number_of_ta_chunks,
 					number_of_state_bits,
@@ -1527,71 +1570,247 @@ void cb_calculate_spatio_temporal_features(
 					attention,
 					&Xi[patch*number_of_ta_chunks]
 				);
-				
-				// Just after
-				if (patch > 0 && !(d % 2)) {
-					if (clause_output) {
-						chunk_nr = (number_of_clauses*4*d + j) / 32;
-						chunk_pos = (number_of_clauses*4*d + j) % 32;
-						Xi[(patch-1)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
-
-						chunk_nr = (number_of_clauses*4*d + j + number_of_literals/2) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_literals/2) % 32;
-						Xi[(patch-1)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
-					}
-				}
-
-				// Just before
-				if (patch < number_of_patches - 1 && !(d % 2)) {
-					if (clause_output) {
-						chunk_nr = (number_of_clauses*4*d + j + number_of_clauses) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_clauses) % 32;
-						Xi[(patch+1)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
-
-						chunk_nr = (number_of_clauses*4*d + j + number_of_clauses + number_of_literals/2) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_clauses + number_of_literals/2) % 32;
-						Xi[(patch+1)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
-					}
-				}
-
-				if (clause_output && (d % 2)) {
-					// After
-
-					for (int patch_before = 0; patch_before < patch; ++patch_before) {
-						chunk_nr = (number_of_clauses*4*d + j + number_of_clauses*2) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_clauses*2) % 32;
-						Xi[patch_before*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
-
-						chunk_nr = (number_of_clauses*4*d + j + number_of_clauses*2 + number_of_literals/2) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_clauses*2 + number_of_literals/2) % 32;
-						Xi[patch_before*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);		
-					}
-
-					// Before
-					for (int patch_after = patch + 1; patch_after < number_of_patches; ++patch_after) {
-						chunk_nr = (number_of_clauses*4*d + j + number_of_clauses*3) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_clauses*3) % 32;
-						Xi[patch_after*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+			}
 
-						chunk_nr = (number_of_clauses*4*d + j + number_of_clauses*3 + number_of_literals/2) / 32;
-						chunk_pos = (number_of_clauses*4*d + j + number_of_clauses*3 + number_of_literals/2) % 32;
-						Xi[patch_after*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+			// for (int patch = 0; patch < number_of_patches; ++patch) {
+			// 	if (clause_value_in_patch[patch] && (d < depth)) {
+			// 		// Right
+			// 		if (patch > 0) {
+			// 			for (int k = 0; k < hypervector_bits; ++k) {
+			// 				int feature_index = hypervector_size*d + (hypervectors[j*hypervector_bits + k]);
+						
+			// 				chunk_nr = feature_index / 32;
+			// 				chunk_pos = feature_index % 32;
+			// 				Xi[(patch-1)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+			// 				chunk_nr = (feature_index + number_of_features) / 32;
+			// 				chunk_pos = (feature_index + number_of_features) % 32;
+			// 				Xi[(patch-1)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+			// 			}
+			// 		}
+
+			// 		// Left
+			// 		if (patch < number_of_patches - 1) {
+			// 			for (int k = 0; k < hypervector_bits; ++k) {
+			// 				int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + 1) % hypervector_size);
+						
+			// 				chunk_nr = feature_index / 32;
+			// 				chunk_pos = feature_index % 32;
+			// 				Xi[(patch+1)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+			// 				chunk_nr = (feature_index + number_of_features) / 32;
+			// 				chunk_pos = (feature_index + number_of_features) % 32;
+			// 				Xi[(patch+1)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+			// 			}
+			// 		}
+
+			// 		// Below
+			// 		if (patch > dim_x) {
+			// 			for (int k = 0; k < hypervector_bits; ++k) {
+			// 				int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + 2) % hypervector_size);
+						
+			// 				chunk_nr = feature_index / 32;
+			// 				chunk_pos = feature_index % 32;
+			// 				Xi[(patch-dim_x)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+			// 				chunk_nr = (feature_index + number_of_features) / 32;
+			// 				chunk_pos = (feature_index + number_of_features) % 32;
+			// 				Xi[(patch-dim_x)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+			// 			}
+			// 		}
+
+			// 		// Above
+			// 		if (patch < number_of_patches - 1 - dim_x) {
+			// 			for (int k = 0; k < hypervector_bits; ++k) {
+			// 				int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + 3) % hypervector_size);
+						
+			// 				chunk_nr = feature_index / 32;
+			// 				chunk_pos = feature_index % 32;
+			// 				Xi[(patch+dim_x)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+			// 				chunk_nr = (feature_index + number_of_features) / 32;
+			// 				chunk_pos = (feature_index + number_of_features) % 32;
+			// 				Xi[(patch+dim_x)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+			// 			}
+			// 		}
+			// 	}
+			// }
+
+
+			if (1 || d == depth-1) {
+				for (int patch = 0; patch < number_of_patches; ++patch) {
+					if (clause_value_in_patch[patch]) {
+						int column = patch % dim_x;
+						int row = patch / dim_x;
+
+						for (int r = 1; r <= row; ++r) {
+							for (int k = 0; k < hypervector_bits; ++k) {
+								int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + r) % hypervector_size);
+							
+								chunk_nr = feature_index / 32;
+								chunk_pos = feature_index % 32;
+								Xi[(patch - r*dim_x)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+								chunk_nr = (feature_index + number_of_features) / 32;
+								chunk_pos = (feature_index + number_of_features) % 32;
+								Xi[(patch - r*dim_x)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+							}
+						}
+
+						for (int r = 1; r < (number_of_patches/dim_x - row); ++r) {
+							for (int k = 0; k < hypervector_bits; ++k) {
+								int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + r*number_of_patches) % hypervector_size);
+							
+								chunk_nr = feature_index / 32;
+								chunk_pos = feature_index % 32;
+								Xi[(patch + r*dim_x)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+								chunk_nr = (feature_index + number_of_features) / 32;
+								chunk_pos = (feature_index + number_of_features) % 32;
+								Xi[(patch + r*dim_x)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+							}
+						}
+
+						for (int c = 1; c <= column; ++c) {
+							for (int k = 0; k < hypervector_bits; ++k) {
+								int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + c*number_of_patches*number_of_patches) % hypervector_size);
+							
+								chunk_nr = feature_index / 32;
+								chunk_pos = feature_index % 32;
+								Xi[(patch - c)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+								chunk_nr = (feature_index + number_of_features) / 32;
+								chunk_pos = (feature_index + number_of_features) % 32;
+								Xi[(patch - c)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+							}
+						}
+
+						for (int c = 1; c < dim_x - column; ++c) {
+							for (int k = 0; k < hypervector_bits; ++k) {
+								int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + c*number_of_patches*number_of_patches*number_of_patches) % hypervector_size);
+							
+								chunk_nr = feature_index / 32;
+								chunk_pos = feature_index % 32;
+								Xi[(patch + c)*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+								chunk_nr = (feature_index + number_of_features) / 32;
+								chunk_pos = (feature_index + number_of_features) % 32;
+								Xi[(patch + c)*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+							}
+						}
 					}
 				}
+				// Left
+
+				// int clause_first_true_in_column = number_of_patches / dim_x;
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if (clause_value_in_patch[patch] && (patch % dim_x) < clause_first_true_in_column) {
+				// 		clause_first_true_in_column = patch % dim_x;
+				// 	}
+				// }
+
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if ((patch % dim_x) > clause_first_true_in_column) {
+				// 		for (int k = 0; k < hypervector_bits; ++k) {
+				// 			int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + (patch % dim_x) - clause_first_true_in_column) % hypervector_size);
+						
+				// 			chunk_nr = feature_index / 32;
+				// 			chunk_pos = feature_index % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+				// 			chunk_nr = (feature_index + number_of_features) / 32;
+				// 			chunk_pos = (feature_index + number_of_features) % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+				// 		}
+				// 	}
+				// }
+
+				// // Above
+
+				// int clause_first_true_in_row = number_of_patches / dim_x;
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if (clause_value_in_patch[patch] && (patch / dim_x) < clause_first_true_in_row) {
+				// 		clause_first_true_in_row = patch / dim_x;
+				// 	}
+				// }
+
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if ((patch / dim_x) > clause_first_true_in_row) {
+				// 		for (int k = 0; k < hypervector_bits; ++k) {
+				// 			int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + ((patch / dim_x) - clause_first_true_in_row)*number_of_patches) % hypervector_size);
+						
+				// 			chunk_nr = feature_index / 32;
+				// 			chunk_pos = feature_index % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+				// 			chunk_nr = (feature_index + number_of_features) / 32;
+				// 			chunk_pos = (feature_index + number_of_features) % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+				// 		}
+				// 	}
+				// }
+
+				// // Right
+
+				// int clause_last_true_in_column = -1;
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if (clause_value_in_patch[patch] && (patch % dim_x) > clause_last_true_in_column) {
+				// 		clause_last_true_in_column = patch % dim_x;
+				// 	}
+				// }
+
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if ((patch % dim_x) < clause_last_true_in_column) {
+				// 		for (int k = 0; k < hypervector_bits; ++k) {
+				// 			int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + (clause_last_true_in_column - (patch % dim_x))*number_of_patches*number_of_patches) % hypervector_size);
+						
+				// 			chunk_nr = feature_index / 32;
+				// 			chunk_pos = feature_index % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+				// 			chunk_nr = (feature_index + number_of_features) / 32;
+				// 			chunk_pos = (feature_index + number_of_features) % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+				// 		}
+				// 	}
+				// }
+
+				// // Below
+
+				// int clause_last_true_in_row = -1;
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if (clause_value_in_patch[patch] && (patch / dim_x) > clause_last_true_in_row) {
+				// 		clause_last_true_in_row = patch / dim_x;
+				// 	}
+				// }
+
+				// for (int patch = 0; patch < number_of_patches; ++patch) {
+				// 	if ((patch / dim_x) < clause_last_true_in_row) {
+				// 		for (int k = 0; k < hypervector_bits; ++k) {
+				// 			int feature_index = hypervector_size*d + ((hypervectors[j*hypervector_bits + k] + (clause_last_true_in_row - (patch / dim_x))*number_of_patches*number_of_patches*number_of_patches) % hypervector_size);
+						
+				// 			chunk_nr = feature_index / 32;
+				// 			chunk_pos = feature_index % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] |= (1U << chunk_pos);
+
+				// 			chunk_nr = (feature_index + number_of_features) / 32;
+				// 			chunk_pos = (feature_index + number_of_features) % 32;
+				// 			Xi[patch*number_of_ta_chunks + chunk_nr] &= ~(1U << chunk_pos);
+				// 		}
+				// 	}
+				// }
 			}
-
 		}
 	
-
-		//Increase attention to next layer
-		for (int j = 0; j < number_of_clauses*4; ++j) {
-			int chunk_nr = (number_of_clauses*4*d + j) / 32;
-			int chunk_pos = (number_of_clauses*4*d + j) % 32;
+		// Increase attention to next layer
+		for (int j = 0; j < hypervector_size; ++j) {
+			int chunk_nr = (hypervector_size*d + j) / 32;
+			int chunk_pos = (hypervector_size*d + j) % 32;
 
 			attention[chunk_nr] |= (1U << chunk_pos);
 
-			chunk_nr = (number_of_clauses*4*d + j + number_of_literals / 2) / 32;
-			chunk_pos = (number_of_clauses*4*d + j +  number_of_literals / 2) % 32;
+			chunk_nr = (hypervector_size*d + j + number_of_features) / 32;
+			chunk_pos = (hypervector_size*d + j +  number_of_features) % 32;
 
 			attention[chunk_nr] |= (1U << chunk_pos);
 		}
diff --git a/tmu/lib/src/Tools.c b/tmu/lib/src/Tools.c
index 790288ca..12f2df0e 100644
--- a/tmu/lib/src/Tools.c
+++ b/tmu/lib/src/Tools.c
@@ -135,14 +135,14 @@ void tmu_encode(
 )
 {
 	int global_number_of_features = dim_x * dim_y * dim_z;
-	int number_of_features = spatio_temporal_features + patch_dim_x * patch_dim_y * dim_z + (dim_x - patch_dim_x) + (dim_y - patch_dim_y);
+	int number_of_features = patch_dim_x * patch_dim_y * dim_z;
 	int number_of_patches = (dim_x - patch_dim_x + 1) * (dim_y - patch_dim_y + 1);
 
 	int number_of_literal_chunks;
 	if (append_negated) {
-		number_of_literal_chunks= (((2*number_of_features-1)/32 + 1));
+		number_of_literal_chunks = (((2*number_of_features-1)/32 + 1));
 	} else {
-		number_of_literal_chunks= (((number_of_features-1)/32 + 1));
+		number_of_literal_chunks = (((number_of_features-1)/32 + 1));
 	}
 
 	unsigned int *Xi;
@@ -155,10 +155,8 @@ void tmu_encode(
 
 	memset(encoded_X, 0, number_of_examples * number_of_patches * number_of_literal_chunks * sizeof(unsigned int));
 
-	unsigned int encoded_pos = 0;
+	unsigned long long int encoded_pos = 0;
 	for (int i = 0; i < number_of_examples; ++i) {
-		//printf("%d\n", i);
-
 		int patch_nr = 0;
 		// Produce the patches of the current image
 		for (int y = 0; y < dim_y - patch_dim_y + 1; ++y) {
@@ -166,51 +164,12 @@ void tmu_encode(
 				Xi = &X[input_pos];
 				encoded_Xi = &encoded_X[encoded_pos];
 
-				// Encode spatio temporal features into feature vector 
-				for (int spatio_temporal_feature = 0; spatio_temporal_feature < spatio_temporal_features; ++spatio_temporal_feature) {
-
-					int chunk_nr = (spatio_temporal_feature + number_of_features) / 32;
-					int chunk_pos = (spatio_temporal_feature + number_of_features) % 32;
-					encoded_Xi[chunk_nr] |= (1 << chunk_pos);
-				}
-
-				// // Encode y coordinate of patch into feature vector 
-				// for (int y_threshold = 0; y_threshold < dim_y - patch_dim_y; ++y_threshold) {
-				// 	int patch_pos = spatio_temporal_features + y_threshold;
-
-				// 	if (y > y_threshold) {
-				// 		int chunk_nr = patch_pos / 32;
-				// 		int chunk_pos = patch_pos % 32;
-				// 		encoded_Xi[chunk_nr] |= (1 << chunk_pos);
-				// 	} else if (append_negated) {
-				// 		int chunk_nr = (patch_pos + number_of_features) / 32;
-				// 		int chunk_pos = (patch_pos + number_of_features) % 32;
-				// 		encoded_Xi[chunk_nr] |= (1 << chunk_pos);
-				// 	}
-				// }
-
-				// // Encode x coordinate of patch into feature vector
-				// for (int x_threshold = 0; x_threshold < dim_x - patch_dim_x; ++x_threshold) {
-				// 	int patch_pos = spatio_temporal_features + (dim_y - patch_dim_y) + x_threshold;
-
-				// 	if (x > x_threshold) {
-				// 		int chunk_nr = patch_pos / 32;
-				// 		int chunk_pos = patch_pos % 32;
-
-				// 		encoded_Xi[chunk_nr] |= (1 << chunk_pos);
-				// 	} else if (append_negated) {
-				// 		int chunk_nr = (patch_pos + number_of_features) / 32;
-				// 		int chunk_pos = (patch_pos + number_of_features) % 32;
-				// 		encoded_Xi[chunk_nr] |= (1 << chunk_pos);
-				// 	}
-				// } 
-
 				// Encode patch content into feature vector
 				for (int p_y = 0; p_y < patch_dim_y; ++p_y) {
 					for (int p_x = 0; p_x < patch_dim_x; ++p_x) {
 						for (int z = 0; z < dim_z; ++z) {
 							int image_pos = (y + p_y)*dim_x*dim_z + (x + p_x)*dim_z + z;
-							int patch_pos = spatio_temporal_features + (dim_y - patch_dim_y) + (dim_x - patch_dim_x) + p_y * patch_dim_x * dim_z + p_x * dim_z + z;
+							int patch_pos = p_y * patch_dim_x * dim_z + p_x * dim_z + z;
 
 							if (Xi[image_pos] == 1) {
 								int chunk_nr = patch_pos / 32;
diff --git a/tmu/models/classification/coalesced_classifier.py b/tmu/models/classification/coalesced_classifier.py
index 89494ffe..4bac7169 100644
--- a/tmu/models/classification/coalesced_classifier.py
+++ b/tmu/models/classification/coalesced_classifier.py
@@ -47,6 +47,8 @@ def __init__(
         weighted_clauses=False,
         clause_drop_p=0.0,
         literal_drop_p=0.0,
+        spatio_temporal=False,
+        depth=0,
         seed=None
     ):
         super().__init__(
@@ -68,6 +70,8 @@ def __init__(
             weighted_clauses=weighted_clauses,
             clause_drop_p=clause_drop_p,
             literal_drop_p=literal_drop_p,
+            spatio_temporal=spatio_temporal,
+            depth=depth,
             seed=seed
         )
         SingleClauseBankMixin.__init__(self)