diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b5964f6..9061a7ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Deprecated
 ### Removed
 ### Fixed
+- Fix bug in GCI2 score for ELEmbeddings
+- Fix bottleneck in ELBE example for PPI.
+- Fix bugs in BoxSquaredEL model.
+
 ### Security
 
 ## [0.3.0]
diff --git a/README.md b/README.md
index 3a3a1994..dc1bbcce 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ git clone https://github.com/bio-ontology-research-group/mowl.git
 
 cd mowl
 
-conda env create -f envs/environment_3.8.yml
+conda env create -f envs/environment_3_8.yml
 conda activate mowl
 
 ./build_jars.sh
diff --git a/examples/elmodels/plot_1_elembeddings.py b/examples/elmodels/plot_1_elembeddings.py
index 656d264f..6a9d3cc8 100644
--- a/examples/elmodels/plot_1_elembeddings.py
+++ b/examples/elmodels/plot_1_elembeddings.py
@@ -55,7 +55,7 @@
 # ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
 #
 # .. math::
-#    p_1 \sqsubseteq interacts\_with. p_2
+#    p_1 \sqsubseteq \exists interacts\_with. p_2
 #
 # For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELEmPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.
 
diff --git a/examples/elmodels/plot_2_elboxembeddings.py b/examples/elmodels/plot_2_elboxembeddings.py
index 87b30439..fee0c6e4 100644
--- a/examples/elmodels/plot_2_elboxembeddings.py
+++ b/examples/elmodels/plot_2_elboxembeddings.py
@@ -50,7 +50,7 @@
 # ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
 #
 # .. math::
-#    p_1 \sqsubseteq interacts\_with. p_2
+#    p_1 \sqsubseteq \exists interacts\_with. p_2
 #
 # For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELBoxPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.
 
diff --git a/mowl/base_models/elmodel.py b/mowl/base_models/elmodel.py
index 7205e117..c2d53db0 100644
--- a/mowl/base_models/elmodel.py
+++ b/mowl/base_models/elmodel.py
@@ -1,8 +1,10 @@
 from mowl.ontology.normalize import ELNormalizer
 from mowl.base_models.model import Model
+from mowl.datasets.el import ELDataset
+from mowl.projection import projector_factory
 import torch as th
 from torch.utils.data import DataLoader, default_collate
-from mowl.datasets.el import ELDataset
+
 from deprecated.sphinx import versionadded
 
 from org.semanticweb.owlapi.model import OWLClassExpression, OWLClass, OWLObjectSomeValuesFrom, OWLObjectIntersectionOf
@@ -48,6 +50,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
         self._validation_datasets = None
         self._testing_datasets = None
 
+        self._loaded_eval = False
 
     def init_module(self):
         raise NotImplementedError
@@ -379,3 +382,43 @@ def from_pretrained(self, model):
         #self._kge_method = kge_method
     
 
+
+
+    def load_pairwise_eval_data(self):
+
+        if self._loaded_eval:
+            return
+
+        eval_property = self.dataset.get_evaluation_property()
+        head_classes, tail_classes = self.dataset.evaluation_classes
+        self._head_entities = head_classes.as_str
+        self._tail_entities = tail_classes.as_str
+                        
+        eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
+                                           relations=[eval_property])
+
+        self._training_set = eval_projector.project(self.dataset.ontology)
+        self._testing_set = eval_projector.project(self.dataset.testing)
+
+        self._loaded_eval = True
+
+
+    @property
+    def training_set(self):
+        self.load_pairwise_eval_data()
+        return self._training_set
+
+    @property
+    def testing_set(self):
+        self.load_pairwise_eval_data()
+        return self._testing_set
+
+    @property
+    def head_entities(self):
+        self.load_pairwise_eval_data()
+        return self._head_entities
+
+    @property
+    def tail_entities(self):
+        self.load_pairwise_eval_data()
+        return self._tail_entities
diff --git a/mowl/evaluation/rank_based.py b/mowl/evaluation/rank_based.py
index 6ec75064..a1a274a9 100644
--- a/mowl/evaluation/rank_based.py
+++ b/mowl/evaluation/rank_based.py
@@ -125,7 +125,7 @@ def load_training_scores(self):
             c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
             c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]
 
-            self.training_scores[c, d] = 1000000
+            self.training_scores[c, d] = 10000
 
         logging.info("Training scores created")
         self._loaded_tr_scores = True
@@ -231,6 +231,7 @@ def activation(x):
             print(f'Hits@100: {top100:.2f} Filtered:   {ftop100:.2f}')
             print(f'MR:       {mean_rank:.2f} Filtered: {fmean_rank:.2f}')
             print(f'AUC:      {rank_auc:.2f} Filtered:   {frank_auc:.2f}')
+            print(f"Tail entities: {num_tail_entities}")
 
         self.metrics = {
             "hits@1": top1,
diff --git a/mowl/models/__init__.py b/mowl/models/__init__.py
index 3dae4a10..bfb43f7d 100644
--- a/mowl/models/__init__.py
+++ b/mowl/models/__init__.py
@@ -6,6 +6,8 @@
 from mowl.models.elboxembeddings.examples.model_ppi import ELBoxPPI
 from mowl.models.elboxembeddings.examples.model_gda import ELBoxGDA
 
+from mowl.models.boxsquaredel.model import BoxSquaredEL
+
 from mowl.models.graph_random_walk.random_walk_w2v_model import RandomWalkPlusW2VModel
 from mowl.models.graph_kge.graph_pykeen_model import GraphPlusPyKEENModel
 from mowl.models.syntactic.w2v_model import SyntacticPlusW2VModel
diff --git a/mowl/models/boxsquaredel/__init__.py b/mowl/models/boxsquaredel/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/mowl/models/boxsquaredel/evaluate.py b/mowl/models/boxsquaredel/evaluate.py
new file mode 100644
index 00000000..c98a3c54
--- /dev/null
+++ b/mowl/models/boxsquaredel/evaluate.py
@@ -0,0 +1,130 @@
+from mowl.evaluation.base import AxiomsRankBasedEvaluator
+from mowl.projection.factory import projector_factory
+from mowl.projection.edge import Edge
+import logging
+import numpy as np
+from scipy.stats import rankdata
+import torch as th
+
+
+class BoxSquaredELPPIEvaluator(AxiomsRankBasedEvaluator):
+
+    def __init__(
+            self,
+            axioms,
+            eval_method,
+            axioms_to_filter,
+            class_name_indexemb,
+            rel_name_indexemb,
+            device="cpu",
+            verbose=False
+    ):
+
+        super().__init__(axioms, eval_method, axioms_to_filter, device, verbose)
+
+        self.class_name_indexemb = class_name_indexemb
+        self.relation_name_indexemb = rel_name_indexemb
+
+        self._loaded_training_scores = False
+        self._loaded_eval_data = False
+        self._loaded_ht_data = False
+
+    def _load_head_tail_entities(self):
+        if self._loaded_ht_data:
+            return
+
+        ents, _ = Edge.getEntitiesAndRelations(self.axioms)
+        ents_filter, _ = Edge.getEntitiesAndRelations(self.axioms_to_filter)
+
+        entities = list(set(ents) | set(ents_filter))
+
+        self.head_entities = set()
+        for e in entities:
+            if e in self.class_name_indexemb:
+                self.head_entities.add(e)
+            else:
+                logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)
+
+        self.tail_entities = set()
+        for e in entities:
+            if e in self.class_name_indexemb:
+                self.tail_entities.add(e)
+            else:
+                logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)
+
+        self.head_name_indexemb = {k: self.class_name_indexemb[k] for k in self.head_entities}
+        self.tail_name_indexemb = {k: self.class_name_indexemb[k] for k in self.tail_entities}
+
+        self.head_indexemb_indexsc = {v: k for k, v in enumerate(self.head_name_indexemb.values())}
+        self.tail_indexemb_indexsc = {v: k for k, v in enumerate(self.tail_name_indexemb.values())}
+
+        self._loaded_ht_data = True
+
+    def _load_training_scores(self):
+        if self._loaded_training_scores:
+            return self.training_scores
+
+        self._load_head_tail_entities()
+
+        training_scores = np.ones((len(self.head_entities), len(self.tail_entities)),
+                                  dtype=np.int32)
+
+        if self._compute_filtered_metrics:
+            # careful here: c must be in head entities and d must be in tail entities
+            for axiom in self.axioms_to_filter:
+                c, _, d = axiom.astuple()
+                if (c not in self.head_entities) or not (d in self.tail_entities):
+                    continue
+
+                c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
+                c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]
+
+                training_scores[c, d] = 10000
+
+            logging.info("Training scores created")
+
+        self._loaded_training_scores = True
+        return training_scores
+
+    def _init_axioms(self, axioms):
+
+        if axioms is None:
+            return None
+
+        projector = projector_factory("taxonomy_rels", relations=["http://interacts_with"])
+
+        edges = projector.project(axioms)
+        return edges  # List of Edges
+
+    def compute_axiom_rank(self, axiom):
+
+        self.training_scores = self._load_training_scores()
+
+        c, r, d = axiom.astuple()
+
+        if not (c in self.head_entities) or not (d in self.tail_entities):
+            return None, None, None
+
+        # Embedding indices
+        c_emb_idx, d_emb_idx = self.head_name_indexemb[c], self.tail_name_indexemb[d]
+
+        # Scores matrix labels
+        c_sc_idx, d_sc_idx = self.head_indexemb_indexsc[c_emb_idx],
+        self.tail_indexemb_indexsc[d_emb_idx]
+
+        r = self.relation_name_indexemb[r]
+
+        data = th.tensor([
+            [c_emb_idx, r, self.tail_name_indexemb[x]] for x in
+            self.tail_entities]).to(self.device)
+
+        res = self.eval_method(data).squeeze().cpu().detach().numpy()
+
+        # self.testing_predictions[c_sc_idx, :] = res
+        index = rankdata(res, method='average')
+        rank = index[d_sc_idx]
+
+        findex = rankdata((res * self.training_scores[c_sc_idx, :]), method='average')
+        frank = findex[d_sc_idx]
+
+        return rank, frank, len(self.tail_entities)
diff --git a/mowl/models/boxsquaredel/model.py b/mowl/models/boxsquaredel/model.py
new file mode 100644
index 00000000..a24a70cb
--- /dev/null
+++ b/mowl/models/boxsquaredel/model.py
@@ -0,0 +1,77 @@
+
+from mowl.nn import BoxSquaredELModule
+from mowl.base_models.elmodel import EmbeddingELModel
+from mowl.models.boxsquaredel.evaluate import BoxSquaredELPPIEvaluator
+import torch as th
+from torch import nn
+
+
+class BoxSquaredEL(EmbeddingELModel):
+    """
+    Implementation based on [peng2020]_.
+    """
+
+    def __init__(self,
+                 dataset,
+                 embed_dim=50,
+                 margin=0.02,
+                 reg_norm=1,
+                 learning_rate=0.001,
+                 epochs=1000,
+                 batch_size=4096 * 8,
+                 delta=2.5,
+                 reg_factor=0.2,
+                 num_negs=4,
+                 model_filepath=None,
+                 device='cpu'
+                 ):
+        super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath)
+
+        
+        self.margin = margin
+        self.reg_norm = reg_norm
+        self.delta = delta
+        self.reg_factor = reg_factor
+        self.num_negs = num_negs
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.device = device
+        self._loaded = False
+        self.extended = False
+        self.init_module()
+
+    def init_module(self):
+        self.module = BoxSquaredELModule(
+            len(self.class_index_dict),
+            len(self.object_property_index_dict),
+            embed_dim=self.embed_dim,
+            gamma=self.margin,
+            delta=self.delta,
+            reg_factor=self.reg_factor
+
+        ).to(self.device)
+
+    def train(self):
+        raise NotImplementedError
+
+                                                                                                
+    def eval_method(self, data):
+        return self.module.gci2_score(data)
+
+    def get_embeddings(self):
+        self.init_module()
+
+        print('Load the best model', self.model_filepath)
+        self.load_best_model()
+                
+        ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(),
+                                           self.module.class_embed.weight.cpu().detach().numpy())}
+        rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(),
+                                           self.module.rel_embed.weight.cpu().detach().numpy())}
+        return ent_embeds, rel_embeds
+
+    def load_best_model(self):
+        self.init_module()
+        self.module.load_state_dict(th.load(self.model_filepath))
+        self.module.eval()
+
diff --git a/mowl/models/elboxembeddings/examples/model_ppi.py b/mowl/models/elboxembeddings/examples/model_ppi.py
index b40063fa..c1eee4dc 100644
--- a/mowl/models/elboxembeddings/examples/model_ppi.py
+++ b/mowl/models/elboxembeddings/examples/model_ppi.py
@@ -20,7 +20,7 @@ class ELBoxPPI(ELBoxEmbeddings):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
                                                                                             
-    def train(self):
+    def train(self, validate_every=1000):
         criterion = nn.MSELoss()
         optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate)
         best_loss = float('inf')
@@ -29,6 +29,9 @@ def train(self):
             k: v.data for k, v in self.training_datasets.items()}
         validation_dataset = self.validation_datasets["gci2"][:]
 
+        prots = [self.class_index_dict[p] for p
+                 in self.dataset.evaluation_classes.as_str]
+        
         for epoch in trange(self.epochs):
             self.module.train()
 
@@ -37,16 +40,13 @@ def train(self):
             for gci_name, gci_dataset in training_datasets.items():
                 if len(gci_dataset) == 0:
                     continue
-                rand_index = np.random.choice(len(gci_dataset), size=512)
-                dst = self.module(gci_dataset[rand_index], gci_name)
+                dst = self.module(gci_dataset, gci_name)
+
                 mse_loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
                 loss += mse_loss
 
                 if gci_name == "gci2":
-                    rand_index = np.random.choice(len(gci_dataset), size=512)
-                    gci_batch = gci_dataset[rand_index]
-                    prots = [self.class_index_dict[p] for p
-                             in self.dataset.evaluation_classes.as_str]
+                    gci_batch = gci_dataset
                     idxs_for_negs = np.random.choice(prots, size=len(gci_batch), replace=True)
                     rand_prot_ids = th.tensor(idxs_for_negs).to(self.device)
                     neg_data = th.cat([gci_batch[:, :2], rand_prot_ids.unsqueeze(1)], dim=1)
@@ -61,20 +61,22 @@ def train(self):
             optimizer.step()
             train_loss += loss.detach().item()
 
-            with th.no_grad():
-                self.module.eval()
-                valid_loss = 0
-                gci2_data = validation_dataset
 
-                dst = self.module(gci2_data, "gci2")
-                loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
-                valid_loss += loss.detach().item()
+            if (epoch + 1) % validate_every == 0:
+                with th.no_grad():
+                    self.module.eval()
+                    valid_loss = 0
+                    gci2_data = validation_dataset
+
+                    dst = self.module(gci2_data, "gci2")
+                    loss = criterion(dst, th.zeros(dst.shape, requires_grad=False).to(self.device))
+                    valid_loss += loss.detach().item()
 
-            if best_loss > valid_loss:
-                best_loss = valid_loss
-                print("Saving model..")
-                th.save(self.module.state_dict(), self.model_filepath)
-            print(f'Epoch {epoch}: Train loss: {train_loss} Valid loss: {valid_loss}')
+                if valid_loss < best_loss:
+                    best_loss = valid_loss
+                    print("Saving model..")
+                    th.save(self.module.state_dict(), self.model_filepath)
+                print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}')
 
         return 1
 
diff --git a/mowl/models/elboxembeddings/model.py b/mowl/models/elboxembeddings/model.py
index 57c79b93..73525dd6 100644
--- a/mowl/models/elboxembeddings/model.py
+++ b/mowl/models/elboxembeddings/model.py
@@ -1,16 +1,8 @@
 
 from mowl.nn import ELBoxModule
 from mowl.base_models.elmodel import EmbeddingELModel
-from mowl.projection.factory import projector_factory
-from mowl.projection.edge import Edge
-import math
-import logging
-import numpy as np
-
 from mowl.models.elboxembeddings.evaluate import ELBoxEmbeddingsPPIEvaluator
 
-from tqdm import trange, tqdm
-
 import torch as th
 from torch import nn
 
@@ -40,7 +32,6 @@ def __init__(self,
         self.epochs = epochs
         self.device = device
         self._loaded = False
-        self._loaded_eval = False
         self.extended = False
         self.init_module()
 
@@ -59,24 +50,6 @@ def train(self):
     def eval_method(self, data):
         return self.module.gci2_loss(data)
 
-    def load_eval_data(self):
-
-        if self._loaded_eval:
-            return
-
-        eval_property = self.dataset.get_evaluation_property()
-        eval_classes = self.dataset.evaluation_classes.as_str
-
-        self._head_entities = set(list(eval_classes)[:])
-        self._tail_entities = set(list(eval_classes)[:])
-
-        eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
-                                           relations=[eval_property])
-
-        self._training_set = eval_projector.project(self.dataset.ontology)
-        self._testing_set = eval_projector.project(self.dataset.testing)
-
-        self._loaded_eval = True
 
     def get_embeddings(self):
         self.init_module()
@@ -95,24 +68,4 @@ def load_best_model(self):
         self.module.load_state_dict(th.load(self.model_filepath))
         self.module.eval()
 
-    @property
-    def training_set(self):
-        self.load_eval_data()
-        return self._training_set
-
-#        self.load_eval_data()
-
-    @property
-    def testing_set(self):
-        self.load_eval_data()
-        return self._testing_set
-
-    @property
-    def head_entities(self):
-        self.load_eval_data()
-        return self._head_entities
 
-    @property
-    def tail_entities(self):
-        self.load_eval_data()
-        return self._tail_entities
diff --git a/mowl/models/elembeddings/examples/model_ppi.py b/mowl/models/elembeddings/examples/model_ppi.py
index ec125f35..8fe2c847 100644
--- a/mowl/models/elembeddings/examples/model_ppi.py
+++ b/mowl/models/elembeddings/examples/model_ppi.py
@@ -17,11 +17,14 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
 
-    def train(self):
+    def train(self, validate_every=1000):
 
         optimizer = th.optim.Adam(self.module.parameters(), lr=self.learning_rate)
         best_loss = float('inf')
 
+        prots = [self.class_index_dict[p] for p
+                 in self.dataset.evaluation_classes.as_str]
+
         for epoch in trange(self.epochs):
             self.module.train()
 
@@ -34,14 +37,14 @@ def train(self):
 
                 loss += th.mean(self.module(gci_dataset[:], gci_name))
                 if gci_name == "gci2":
-                    prots = [self.class_index_dict[p] for p
-                             in self.dataset.evaluation_classes.as_str]
                     idxs_for_negs = np.random.choice(prots, size=len(gci_dataset), replace=True)
                     rand_index = th.tensor(idxs_for_negs).to(self.device)
                     data = gci_dataset[:]
                     neg_data = th.cat([data[:, :2], rand_index.unsqueeze(1)], dim=1)
                     loss += th.mean(self.module(neg_data, gci_name, neg=True))
 
+            loss += self.module.regularization_loss()
+                    
             optimizer.zero_grad()
             loss.backward()
             optimizer.step()
@@ -55,17 +58,16 @@ def train(self):
                 loss = th.mean(self.module(gci2_data, "gci2"))
                 valid_loss += loss.detach().item()
 
-            checkpoint = 100
-            if best_loss > valid_loss:
-                best_loss = valid_loss
-                th.save(self.module.state_dict(), self.model_filepath)
-            if (epoch + 1) % checkpoint == 0:
-                print(f'Epoch {epoch}: Train loss: {train_loss} Valid loss: {valid_loss}')
+            if (epoch + 1) % validate_every == 0:
+                if valid_loss < best_loss:
+                    best_loss = valid_loss
+                    th.save(self.module.state_dict(), self.model_filepath)
+                print(f'Epoch {epoch+1}: Train loss: {train_loss} Valid loss: {valid_loss}')
 
         return 1
 
     def eval_method(self, data):
-        return self.module.gci2_loss(data)
+        return self.module.gci2_score(data)
 
     def evaluate_ppi(self):
         self.init_module()
diff --git a/mowl/models/elembeddings/model.py b/mowl/models/elembeddings/model.py
index d9332ec0..c593d9f8 100644
--- a/mowl/models/elembeddings/model.py
+++ b/mowl/models/elembeddings/model.py
@@ -5,7 +5,6 @@
 from tqdm import trange, tqdm
 import torch as th
 import numpy as np
-from mowl.projection import projector_factory
 
 class ELEmbeddings(EmbeddingELModel):
     """
@@ -37,7 +36,6 @@ def __init__(self,
         self.epochs = epochs
         self.device = device
         self._loaded = False
-        self._loaded_eval = False
         self.extended = False
         self.init_module()
 
@@ -55,26 +53,6 @@ def train(self):
     def eval_method(self, data):
         return self.module.gci2_loss(data)
 
-    def load_eval_data(self):
-
-        if self._loaded_eval:
-            return
-
-        eval_property = self.dataset.get_evaluation_property()
-        eval_classes = self.dataset.evaluation_classes.as_str
-        print(eval_classes)
-
-        self._head_entities = set(list(eval_classes)[:])
-        self._tail_entities = set(list(eval_classes)[:])
-
-        eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
-                                           relations=[eval_property])
-
-        self._training_set = eval_projector.project(self.dataset.ontology)
-        self._testing_set = eval_projector.project(self.dataset.testing)
-
-        self._loaded_eval = True
-
     def get_embeddings(self):
         self.init_module()
 
@@ -94,22 +72,3 @@ def load_best_model(self):
         self.module.load_state_dict(th.load(self.model_filepath))
         self.module.eval()
 
-    @property
-    def training_set(self):
-        self.load_eval_data()
-        return self._training_set
-
-    @property
-    def testing_set(self):
-        self.load_eval_data()
-        return self._testing_set
-
-    @property
-    def head_entities(self):
-        self.load_eval_data()
-        return self._head_entities
-
-    @property
-    def tail_entities(self):
-        self.load_eval_data()
-        return self._tail_entities
diff --git a/mowl/nn/el/boxsquaredel/losses.py b/mowl/nn/el/boxsquaredel/losses.py
index cb720b0e..3a3e68d0 100644
--- a/mowl/nn/el/boxsquaredel/losses.py
+++ b/mowl/nn/el/boxsquaredel/losses.py
@@ -4,7 +4,7 @@
 def box_distance(box_a, box_b):
     center_a, offset_a = box_a
     center_b, offset_b = box_b
-    dist = th.abs(center_a - center_b) - offset_a - offset_b
+    dist = th.abs(center_a - center_b) - offset_a - offset_b 
     return dist
 
 def box_intersection(box_a, box_b):
@@ -18,29 +18,38 @@ def box_intersection(box_a, box_b):
     intersection = (centers, offsets)
     return intersection, lower, upper
 
-def inclusion_loss(box_a, box_b, gamma):
+def inclusion_score(box_a, box_b, gamma):
     dist_a_b = box_distance(box_a, box_b)
     _, offset_a = box_a
-    loss = th.linalg.norm(th.relu(dist_a_b + 2*offset_a - gamma), dim=1)
-    return loss
+    score = th.linalg.norm(th.relu(dist_a_b + 2*offset_a - gamma), dim=1)
+    return score
 
-def gci0_loss(data, class_center, class_offset, gamma, neg=False):
+def gci0_score(data, class_center, class_offset, gamma):
     center_c = class_center(data[:, 0])
     offset_c = th.abs(class_offset(data[:, 0]))
     center_d = class_center(data[:, 1])
     offset_d = th.abs(class_offset(data[:, 1]))
     box_c = (center_c, offset_c)
     box_d = (center_d, offset_d)
-    loss = inclusion_loss(box_c, box_d, gamma)
-    
+    score = inclusion_score(box_c, box_d, gamma)
+    return score
+
+def gci0_loss(data, class_center, class_offset, gamma, neg=False):
+    score = gci0_score(data, class_center, class_offset, gamma)
+    loss = score.square().mean()
     return loss
 
-def gci0_bot_loss(data, class_offset):
+def gci0_bot_score(data, class_offset):
     offset_c = th.abs(class_offset(data[:, 0]))
-    loss = th.linalg.norm(offset_c, dim=1)
+    score = th.linalg.norm(offset_c, dim=1)
+    return score
+
+def gci0_bot_loss(data, class_offset):
+    score = gci0_bot_score(data, class_offset)
+    loss = score.square().mean()
     return loss
 
-def gci1_loss(data, class_center, class_offset, gamma, neg=False):
+def gci1_score(data, class_center, class_offset, gamma):
     center_c = class_center(data[:, 0])
     center_d = class_center(data[:, 1])
     center_e = class_center(data[:, 2])
@@ -53,14 +62,18 @@ def gci1_loss(data, class_center, class_offset, gamma, neg=False):
     box_e = (center_e, offset_e)
 
     intersection, lower, upper = box_intersection(box_c, box_d)
-    box_incl_loss = inclusion_loss(intersection, box_e, gamma)
+    box_incl_score = inclusion_score(intersection, box_e, gamma)
 
-    additional_loss = th.linalg.norm(th.relu(lower - upper), dim=1)
-    loss = box_incl_loss + additional_loss
-    return loss
+    additional_score = th.linalg.norm(th.relu(lower - upper), dim=1)
+    score = box_incl_score + additional_score
+    return score
 
+def gci1_loss(data, class_center, class_offset, gamma, neg=False):
+    score = gci1_score(data, class_center, class_offset, gamma)
+    loss = score.square().mean()
+    return loss
 
-def gci1_bot_loss(data, class_center, class_offset, gamma, neg=False):
+def gci1_bot_score(data, class_center, class_offset, gamma):
 
     center_c = class_center(data[:, 0])
     center_d = class_center(data[:, 1])
@@ -72,47 +85,56 @@ def gci1_bot_loss(data, class_center, class_offset, gamma, neg=False):
     box_d = (center_d, offset_d)
 
     box_dist = box_distance(box_c, box_d)
-    loss = th.linalg.norm(th.relu(-box_dist - gamma), dim=1)
-    return loss
+    score = th.linalg.norm(th.relu(-box_dist - gamma), dim=1)
+    return score
 
+def gci1_bot_loss(data, class_center, class_offset, gamma, neg=False):
+    score = gci1_bot_score(data, class_center, class_offset, gamma)
+    loss = score.square().mean()
+    return loss
 
-def gci2_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor, neg=False):
-    if neg:
-        return gci2_loss_neg(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta)
-    else:
+def gci2_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta):
+    center_c = class_center(data[:, 0])
+    offset_c = th.abs(class_offset(data[:, 0]))
 
-        center_c = class_center(data[:, 0])
-        offset_c = th.abs(class_offset(data[:, 0]))
+    center_head = head_center(data[:, 1])
+    offset_head = th.abs(head_offset(data[:, 1]))
 
-        center_head = head_center(data[:, 1])
-        offset_head = th.abs(head_offset(data[:, 1]))
+    center_tail = tail_center(data[:, 1])
+    offset_tail = th.abs(tail_offset(data[:, 1]))
 
-        center_tail = tail_center(data[:, 1])
-        offset_tail = th.abs(tail_offset(data[:, 1]))
+    center_d = class_center(data[:, 2])
+    offset_d = th.abs(class_offset(data[:, 2]))
 
-        center_d = class_center(data[:, 2])
-        offset_d = th.abs(class_offset(data[:, 2]))
+    bump_c = bump(data[:, 0])
+    bump_d = bump(data[:, 2])
 
-        bump_c = bump(data[:, 0])
-        bump_d = bump(data[:, 2])
-        
-        box_c = (center_c, offset_c)
-        box_head = (center_head, offset_head)
-        box_tail = (center_tail, offset_tail)
-        box_d = (center_d, offset_d)
+    box_c = (center_c, offset_c)
+    box_head = (center_head, offset_head)
+    box_tail = (center_tail, offset_tail)
+    box_d = (center_d, offset_d)
 
-        bumped_c = (center_c + bump_d, offset_c)
-        bumped_d = (center_d + bump_c, offset_d)
+    bumped_c = (center_c + bump_d, offset_c)
+    bumped_d = (center_d + bump_c, offset_d)
 
-        inclussion_1 = inclusion_loss(bumped_c, box_head, gamma)
-        inclussion_2 = inclusion_loss(bumped_d, box_tail, gamma)
+    inclussion_1 = inclusion_score(bumped_c, box_head, gamma)
+    inclussion_2 = inclusion_score(bumped_d, box_tail, gamma)
 
-        loss = inclussion_1 + inclussion_2
-        reg_loss = reg_factor * th.linalg.norm(bump.weight, dim=1).sum()
-        return reg_loss + loss/2
+    score = (inclussion_1 + inclussion_2)/2
+    return score
 
 
-def gci2_loss_neg(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta):
+def gci2_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor, neg=False):
+    if neg:
+        return gci2_loss_neg(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor)
+    else:
+        score = gci2_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta)
+        loss = score.square().mean()
+        reg_loss = 0#reg_factor * th.linalg.norm(bump.weight, dim=1).mean()
+        return loss + reg_loss
+        
+        
+def gci2_loss_neg(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, delta, reg_factor):
 
     def minimal_distance(box_a, box_b, gamma):
         dist = box_distance(box_a, box_b)
@@ -138,20 +160,18 @@ def minimal_distance(box_a, box_b, gamma):
     bumped_c = (center_c + bump_d, offset_c)
     bumped_d = (center_d + bump_c, offset_d)
     
-    fist_part = (delta - minimal_distance(bumped_c, box_head, gamma))**2
-    second_part = (delta - minimal_distance(bumped_d, box_tail, gamma))**2
+    first_part = (delta - minimal_distance(bumped_c, box_head, gamma)).square().mean()
+    second_part = (delta - minimal_distance(bumped_d, box_tail, gamma)).square().mean()
 
-    loss = fist_part + second_part
-    reg_loss = reg_factor * th.linalg.norm(bump.weight, dim=1).sum()
+    loss = first_part + second_part
+    reg_loss = 0#reg_factor * th.linalg.norm(bump.weight, dim=1).mean()
     return loss + reg_loss
 
 
-def gci3_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, reg_factor, neg=False):
-
+def gci3_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma):
     center_d = class_center(data[:, 2])
     offset_d = th.abs(class_offset(data[:, 2]))
 
-    
     center_head = head_center(data[:, 0])
     offset_head = th.abs(head_offset(data[:, 0]))
 
@@ -159,13 +179,31 @@ def gci3_loss(data, class_center, class_offset, head_center, head_offset, tail_c
 
     bumped_head = (center_head - bump_c, offset_head)
     box_d = (center_d, offset_d)
-    loss = inclusion_loss(bumped_head, box_d, gamma)
-    reg_loss = reg_factor * th.linalg.norm(bump.weight, dim=1).sum()
-    return loss + reg_loss
+    score = inclusion_score(bumped_head, box_d, gamma)
+    return score
 
+def gci3_loss(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma, reg_factor, neg=False):
+    score = gci3_score(data, class_center, class_offset, head_center, head_offset, tail_center, tail_offset, bump, gamma)
+    loss = score.square().mean()
+    reg_loss =0# reg_factor * th.linalg.norm(bump.weight, dim=1).mean()
 
-def gci3_bot_loss(data, head_offset):
+    return loss + reg_loss
+
+def gci3_bot_score(data, head_offset):
 
     offset_head = th.abs(head_offset(data[:, 0]))
-    loss = th.linalg.norm(offset_head, dim=1)
+    score = th.linalg.norm(offset_head, dim=1)
+    return score
+
+def gci3_bot_loss(data, head_offset):
+    score = gci3_bot_score(data, head_offset)
+    loss = score.square().mean()
     return loss
+
+
+def reg_loss(bump, reg_factor):
+    reg_loss = reg_factor * th.linalg.norm(bump.weight, dim=1).mean()
+    return reg_loss
+
+
+
diff --git a/mowl/nn/el/boxsquaredel/module.py b/mowl/nn/el/boxsquaredel/module.py
index 221dd22d..663b381a 100644
--- a/mowl/nn/el/boxsquaredel/module.py
+++ b/mowl/nn/el/boxsquaredel/module.py
@@ -60,3 +60,12 @@ def gci3_loss(self, data, neg=False):
 
     def gci3_bot_loss(self, data, neg=False):
         return L.gci3_bot_loss(data, self.head_offset)
+
+
+    def gci2_score(self, data):
+        return L.gci2_score(data, self.class_center, self.class_offset, self.head_center,
+                            self.head_offset, self.tail_center, self.tail_offset, self.bump,
+                           self.gamma, self.delta)
+
+    def regularization_loss(self):
+        return L.reg_loss(self.bump, self.reg_factor)
diff --git a/mowl/nn/el/elbox/losses.py b/mowl/nn/el/elbox/losses.py
index 553b5ed6..28739e55 100644
--- a/mowl/nn/el/elbox/losses.py
+++ b/mowl/nn/el/elbox/losses.py
@@ -67,6 +67,8 @@ def gci2_loss(data, class_embed, class_offset, rel_embed, margin, neg=False):
         dst = th.reshape(th.linalg.norm(th.relu(euc + off_c - off_d + margin), axis=1), [-1, 1])
         return dst
 
+def gci2_score(*args, **kwargs):
+    return gci2_loss(*args, **kwargs)
 
 def gci2_loss_neg(data, class_embed, class_offset, rel_embed, margin):
     c = class_embed(data[:, 0])
diff --git a/mowl/nn/el/elbox/module.py b/mowl/nn/el/elbox/module.py
index e51d8ab1..ae3b0362 100644
--- a/mowl/nn/el/elbox/module.py
+++ b/mowl/nn/el/elbox/module.py
@@ -50,6 +50,10 @@ def gci2_loss(self, data, neg=False):
         return L.gci2_loss(data, self.class_embed, self.class_offset, self.rel_embed,
                            self.margin, neg=neg)
 
+    def gci2_score(self, data):
+        return L.gci2_score(data, self.class_embed, self.class_offset, self.rel_embed,
+                            self.margin)
+    
     def gci3_loss(self, data, neg=False):
         return L.gci3_loss(data, self.class_embed, self.class_offset, self.rel_embed,
                            self.margin, neg=neg)
diff --git a/mowl/nn/el/elem/losses.py b/mowl/nn/el/elem/losses.py
index d085af70..1f4d8ee0 100644
--- a/mowl/nn/el/elem/losses.py
+++ b/mowl/nn/el/elem/losses.py
@@ -2,20 +2,20 @@
 import numpy as np
 
 
-def gci0_loss(data, class_embed, class_rad, class_reg, margin, neg=False):
+def gci0_loss(data, class_embed, class_rad, margin, neg=False):
     c = class_embed(data[:, 0])
     d = class_embed(data[:, 1])
     rc = th.abs(class_rad(data[:, 0]))
     rd = th.abs(class_rad(data[:, 1]))
     dist = th.linalg.norm(c - d, dim=1, keepdim=True) + rc - rd
     loss = th.relu(dist - margin)
-    return loss + class_reg(c) + class_reg(d)
+    return loss 
 
 def gci0_bot_loss(data, class_rad, neg=False):
     rc = class_rad(data[:, 0])
     return rc
 
-def gci1_loss(data, class_embed, class_rad, class_reg, margin, neg=False):
+def gci1_loss(data, class_embed, class_rad, margin, neg=False):
     c = class_embed(data[:, 0])
     d = class_embed(data[:, 1])
     e = class_embed(data[:, 2])
@@ -29,10 +29,10 @@ def gci1_loss(data, class_embed, class_rad, class_reg, margin, neg=False):
     dst3 = th.linalg.norm(e - d, dim=1, keepdim=True)
     loss = (th.relu(dst - sr - margin) + th.relu(dst2 - rc - margin) + th.relu(dst3 - rd - margin))
 
-    return loss + class_reg(c) + class_reg(d) + class_reg(e)
+    return loss 
 
 
-def gci1_bot_loss(data, class_embed, class_rad, class_reg, margin, neg=False):
+def gci1_bot_loss(data, class_embed, class_rad, margin, neg=False):
     c = class_embed(data[:, 0])
     d = class_embed(data[:, 1])
     rc = class_rad(data[:, 0])
@@ -40,30 +40,34 @@ def gci1_bot_loss(data, class_embed, class_rad, class_reg, margin, neg=False):
 
     sr = rc + rd
     dst = th.reshape(th.linalg.norm(d - c, axis=1), [-1, 1])
-    return th.relu(sr - dst + margin) + class_reg(c) + class_reg(d)
+    return th.relu(sr - dst + margin) 
 
 
-def gci2_loss(data, class_embed, class_rad, rel_embed, class_reg, margin, neg=False):
+def gci2_score(data, class_embed, class_rad, rel_embed, margin):
+    # C subClassOf R some D
+    c = class_embed(data[:, 0])
+    rE = rel_embed(data[:, 1])
+    d = class_embed(data[:, 2])
 
-    if neg:
-        return gci2_loss_neg(data, class_embed, class_rad, rel_embed, class_reg, margin)
+    rc = th.abs(class_rad(data[:, 0]))
+    rd = th.abs(class_rad(data[:, 2]))
+    # c should intersect with d + r
 
-    else:
-        # C subClassOf R some D
-        c = class_embed(data[:, 0])
-        rE = rel_embed(data[:, 1])
-        d = class_embed(data[:, 2])
+    dst = th.linalg.norm(c + rE - d, dim=1, keepdim=True)
+    score = th.relu(dst + rc - rd - margin) + 10e-6
+    return score
+    
+def gci2_loss(data, class_embed, class_rad, rel_embed, margin, neg=False):
 
-        rc = th.abs(class_rad(data[:, 0]))
-        rd = th.abs(class_rad(data[:, 2]))
-        # c should intersect with d + r
+    if neg:
+        return gci2_loss_neg(data, class_embed, class_rad, rel_embed, margin)
 
-        dst = th.linalg.norm(c + rE - d, dim=1, keepdim=True)
-        loss = th.relu(dst + rc - rd - margin)
-        return loss + class_reg(c) + class_reg(d)
+    else:
+        score = gci2_score(data, class_embed, class_rad, rel_embed, margin)
+        return score 
 
 
-def gci2_loss_neg(data, class_embed, class_rad, rel_embed, class_reg, margin):
+def gci2_loss_neg(data, class_embed, class_rad, rel_embed, margin):
     # C subClassOf R some D
     c = class_embed(data[:, 0])
     rE = rel_embed(data[:, 1])
@@ -75,10 +79,10 @@ def gci2_loss_neg(data, class_embed, class_rad, rel_embed, class_reg, margin):
 
     dst = th.linalg.norm(c + rE - d, dim=1, keepdim=True)
     loss = th.relu(rc + rd - dst + margin)
-    return loss + class_reg(c) + class_reg(d)
+    return loss 
 
 
-def gci3_loss(data, class_embed, class_rad, rel_embed, class_reg, margin, neg=False):
+def gci3_loss(data, class_embed, class_rad, rel_embed, margin, neg=False):
     # R some C subClassOf D
     rE = rel_embed(data[:, 0])
     c = class_embed(data[:, 1])
@@ -88,8 +92,15 @@ def gci3_loss(data, class_embed, class_rad, rel_embed, class_reg, margin, neg=Fa
 
     euc = th.linalg.norm(c - rE - d, dim=1, keepdim=True)
     loss = th.relu(euc - rc - rd - margin)
-    return loss + class_reg(c) + class_reg(d)
+    return loss 
 
 def gci3_bot_loss(data, class_rad, neg=False):
     rc = class_rad(data[:, 1])
     return rc
+
+
+def regularization_loss(class_embed, reg_factor):
+    res = th.abs(th.linalg.norm(class_embed.weight, axis=1) - reg_factor).mean()
+    # res = th.reshape(res, [-1, 1])
+    return res
+
diff --git a/mowl/nn/el/elem/module.py b/mowl/nn/el/elem/module.py
index 9d589566..25d50361 100644
--- a/mowl/nn/el/elem/module.py
+++ b/mowl/nn/el/elem/module.py
@@ -39,34 +39,35 @@ def __init__(self, nb_ont_classes, nb_rels, embed_dim=50, margin=0.1, reg_norm=1
 
         self.margin = margin
 
-    def class_reg(self, x):
-        # force n-ball to be inside unit ball
-        res = th.abs(th.linalg.norm(x, axis=1) - self.reg_norm)
-        res = th.reshape(res, [-1, 1])
-        return res
-
     def gci0_loss(self, data, neg=False):
-        return L.gci0_loss(data, self.class_embed, self.class_rad, self.class_reg, self.margin,
+        return L.gci0_loss(data, self.class_embed, self.class_rad, self.margin,
                            neg=neg)
 
     def gci0_bot_loss(self, data, neg=False):
         return L.gci0_bot_loss(data, self.class_rad)
         
     def gci1_loss(self, data, neg=False):
-        return L.gci1_loss(data, self.class_embed, self.class_rad, self.class_reg, self.margin,
+        return L.gci1_loss(data, self.class_embed, self.class_rad, self.margin,
                            neg=neg)
 
     def gci1_bot_loss(self, data, neg=False):
-        return L.gci1_bot_loss(data, self.class_embed, self.class_rad, self.class_reg, self.margin,
+        return L.gci1_bot_loss(data, self.class_embed, self.class_rad, self.margin,
                                neg=neg)
 
     def gci2_loss(self, data, neg=False, idxs_for_negs=None):
-        return L.gci2_loss(data, self.class_embed, self.class_rad, self.rel_embed, self.class_reg,
+        return L.gci2_loss(data, self.class_embed, self.class_rad, self.rel_embed,
                            self.margin, neg=neg)
 
     def gci3_loss(self, data, neg=False):
-        return L.gci3_loss(data, self.class_embed, self.class_rad, self.rel_embed, self.class_reg,
+        return L.gci3_loss(data, self.class_embed, self.class_rad, self.rel_embed,
                            self.margin, neg=neg)
 
     def gci3_bot_loss(self, data, neg=False):
         return L.gci3_bot_loss(data, self.class_rad)
+
+
+    def gci2_score(self, data):
+        return L.gci2_score(data, self.class_embed, self.class_rad, self.rel_embed, self.margin)
+
+    def regularization_loss(self):
+        return L.regularization_loss(self.class_embed, self.reg_norm)
diff --git a/setup.py b/setup.py
index 6ed22fd6..5a2d60a2 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setuptools.setup(
     name="mowl-borg",
-    version="0.3.0",
+    version="0.4.0",
     author="Bio-Ontology Research Group",
     author_email="fernando.zhapacamacho@kaust.edu.sa",
     description="mOWL: A machine learning library with ontologies",
diff --git a/tests/__init__.py b/tests/__init__.py
index ae238316..543410a5 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -21,15 +21,15 @@ def setUpModule():
     PPIYeastSlimDataset()
 
 
-def tearDownModule():
-    os.remove('ppi_yeast_slim.tar.gz')
-    os.remove('gda_human_el.tar.gz')
-    os.remove('gda_mouse_el.tar.gz')
-    os.remove('family.tar.gz')
-    shutil.rmtree('ppi_yeast_slim')
-    shutil.rmtree('gda_human_el')
-    shutil.rmtree('gda_mouse_el')
-    shutil.rmtree('family')
+# def tearDownModule():
+    # os.remove('ppi_yeast_slim.tar.gz')
+    # os.remove('gda_human_el.tar.gz')
+    # os.remove('gda_mouse_el.tar.gz')
+    # os.remove('family.tar.gz')
+    # shutil.rmtree('ppi_yeast_slim')
+    # shutil.rmtree('gda_human_el')
+    # shutil.rmtree('gda_mouse_el')
+    # shutil.rmtree('family')
 
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 1a2f8c55..cffa898b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,15 +22,15 @@ def pytest_configure(config):
     PPIYeastSlimDataset()
 
 
-def pytest_unconfigure(config):
-    os.remove('ppi_yeast_slim.tar.gz')
-    os.remove('gda_human_el.tar.gz')
-    os.remove('gda_mouse_el.tar.gz')
-    os.remove('family.tar.gz')
-    shutil.rmtree('ppi_yeast_slim')
-    shutil.rmtree('gda_human_el')
-    shutil.rmtree('gda_mouse_el')
-    shutil.rmtree('family')
+# def pytest_unconfigure(config):
+    # os.remove('ppi_yeast_slim.tar.gz')
+    # os.remove('gda_human_el.tar.gz')
+    # os.remove('gda_mouse_el.tar.gz')
+    # os.remove('family.tar.gz')
+    # shutil.rmtree('ppi_yeast_slim')
+    # shutil.rmtree('gda_human_el')
+    # shutil.rmtree('gda_mouse_el')
+    # shutil.rmtree('family')
 
 
 
diff --git a/tests/inductive/test_semantic_model.py b/tests/inductive/test_semantic_model.py
index c80d90f4..57fd3e11 100644
--- a/tests/inductive/test_semantic_model.py
+++ b/tests/inductive/test_semantic_model.py
@@ -59,17 +59,17 @@ def test_from_pretrained(self):
         
             
     def test_train_after_pretrained(self):
-        first_model = ELEmPPI(self.ppi_dataset, model_filepath="first_kge_model", epochs=3)
-        first_model.train()
+        first_model = ELEmPPI(self.ppi_dataset, model_filepath="first_semantic_model", epochs=3)
+        first_model.train(validate_every=1)
 
-        first_kge_model = first_model.model_filepath
+        first_semantic_model = first_model.model_filepath
         
-        self.assertTrue(os.path.exists(first_kge_model))
+        self.assertTrue(os.path.exists(first_semantic_model))
 
         second_model = ELEmPPI(self.ppi_dataset, epochs=2)
-        second_model.from_pretrained(first_kge_model)
+        second_model.from_pretrained(first_semantic_model)
 
-        self.assertNotEqual(second_model.model_filepath, first_kge_model)
+        self.assertNotEqual(second_model.model_filepath, first_semantic_model)
         second_model.train()