Skip to content

Commit

Permalink
Merge pull request #68 from bio-ontology-research-group/box2el
Browse files Browse the repository at this point in the history
EL Geometric Models
  • Loading branch information
ferzcam authored Mar 20, 2024
2 parents 0d95f06 + e8b57c8 commit d6dd4db
Show file tree
Hide file tree
Showing 24 changed files with 476 additions and 238 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Deprecated
### Removed
### Fixed
- Fix bug in GCI2 score for ELEmbeddings
- Fix bottleneck in ELBE example for PPI.
- Fix bugs in BoxSquaredEL model.

### Security

## [0.3.0]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ git clone https://github.com/bio-ontology-research-group/mowl.git
cd mowl
conda env create -f envs/environment_3.8.yml
conda env create -f envs/environment_3_8.yml
conda activate mowl
./build_jars.sh
Expand Down
2 changes: 1 addition & 1 deletion examples/elmodels/plot_1_elembeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
# ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
#
# .. math::
# p_1 \sqsubseteq interacts\_with. p_2
# p_1 \sqsubseteq \exists interacts\_with. p_2
#
# For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELEmPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.

Expand Down
2 changes: 1 addition & 1 deletion examples/elmodels/plot_2_elboxembeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# ":math:`p_1` interacts with :math:`p_2`" is encoded using GCI 2 as:
#
# .. math::
# p_1 \sqsubseteq interacts\_with. p_2
# p_1 \sqsubseteq \exists interacts\_with. p_2
#
# For that, we can use the class :class:`mowl.models.elembeddings.examples.model_ppi.ELBoxPPI` mode, which uses the :class:`mowl.datasets.builtin.PPIYeastSlimDataset` dataset.

Expand Down
45 changes: 44 additions & 1 deletion mowl/base_models/elmodel.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from mowl.ontology.normalize import ELNormalizer
from mowl.base_models.model import Model
from mowl.datasets.el import ELDataset
from mowl.projection import projector_factory
import torch as th
from torch.utils.data import DataLoader, default_collate
from mowl.datasets.el import ELDataset

from deprecated.sphinx import versionadded

from org.semanticweb.owlapi.model import OWLClassExpression, OWLClass, OWLObjectSomeValuesFrom, OWLObjectIntersectionOf
Expand Down Expand Up @@ -48,6 +50,7 @@ def __init__(self, dataset, embed_dim, batch_size, extended=True, model_filepath
self._validation_datasets = None
self._testing_datasets = None

self._loaded_eval = False

def init_module(self):
raise NotImplementedError
Expand Down Expand Up @@ -379,3 +382,43 @@ def from_pretrained(self, model):
#self._kge_method = kge_method




def load_pairwise_eval_data(self):

if self._loaded_eval:
return

eval_property = self.dataset.get_evaluation_property()
head_classes, tail_classes = self.dataset.evaluation_classes
self._head_entities = head_classes.as_str
self._tail_entities = tail_classes.as_str

eval_projector = projector_factory('taxonomy_rels', taxonomy=False,
relations=[eval_property])

self._training_set = eval_projector.project(self.dataset.ontology)
self._testing_set = eval_projector.project(self.dataset.testing)

self._loaded_eval = True


@property
def training_set(self):
self.load_pairwise_eval_data()
return self._training_set

@property
def testing_set(self):
self.load_pairwise_eval_data()
return self._testing_set

@property
def head_entities(self):
self.load_pairwise_eval_data()
return self._head_entities

@property
def tail_entities(self):
self.load_pairwise_eval_data()
return self._tail_entities
3 changes: 2 additions & 1 deletion mowl/evaluation/rank_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def load_training_scores(self):
c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]

self.training_scores[c, d] = 1000000
self.training_scores[c, d] = 10000

logging.info("Training scores created")
self._loaded_tr_scores = True
Expand Down Expand Up @@ -231,6 +231,7 @@ def activation(x):
print(f'Hits@100: {top100:.2f} Filtered: {ftop100:.2f}')
print(f'MR: {mean_rank:.2f} Filtered: {fmean_rank:.2f}')
print(f'AUC: {rank_auc:.2f} Filtered: {frank_auc:.2f}')
print(f"Tail entities: {num_tail_entities}")

self.metrics = {
"hits@1": top1,
Expand Down
2 changes: 2 additions & 0 deletions mowl/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from mowl.models.elboxembeddings.examples.model_ppi import ELBoxPPI
from mowl.models.elboxembeddings.examples.model_gda import ELBoxGDA

from mowl.models.boxsquaredel.model import BoxSquaredEL

from mowl.models.graph_random_walk.random_walk_w2v_model import RandomWalkPlusW2VModel
from mowl.models.graph_kge.graph_pykeen_model import GraphPlusPyKEENModel
from mowl.models.syntactic.w2v_model import SyntacticPlusW2VModel
Empty file.
130 changes: 130 additions & 0 deletions mowl/models/boxsquaredel/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from mowl.evaluation.base import AxiomsRankBasedEvaluator
from mowl.projection.factory import projector_factory
from mowl.projection.edge import Edge
import logging
import numpy as np
from scipy.stats import rankdata
import torch as th


class BoxSquaredELPPIEvaluator(AxiomsRankBasedEvaluator):

def __init__(
self,
axioms,
eval_method,
axioms_to_filter,
class_name_indexemb,
rel_name_indexemb,
device="cpu",
verbose=False
):

super().__init__(axioms, eval_method, axioms_to_filter, device, verbose)

self.class_name_indexemb = class_name_indexemb
self.relation_name_indexemb = rel_name_indexemb

self._loaded_training_scores = False
self._loaded_eval_data = False
self._loaded_ht_data = False

def _load_head_tail_entities(self):
if self._loaded_ht_data:
return

ents, _ = Edge.getEntitiesAndRelations(self.axioms)
ents_filter, _ = Edge.getEntitiesAndRelations(self.axioms_to_filter)

entities = list(set(ents) | set(ents_filter))

self.head_entities = set()
for e in entities:
if e in self.class_name_indexemb:
self.head_entities.add(e)
else:
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)

self.tail_entities = set()
for e in entities:
if e in self.class_name_indexemb:
self.tail_entities.add(e)
else:
logging.info("Entity %s not present in the embeddings dictionary. Ignoring it.", e)

self.head_name_indexemb = {k: self.class_name_indexemb[k] for k in self.head_entities}
self.tail_name_indexemb = {k: self.class_name_indexemb[k] for k in self.tail_entities}

self.head_indexemb_indexsc = {v: k for k, v in enumerate(self.head_name_indexemb.values())}
self.tail_indexemb_indexsc = {v: k for k, v in enumerate(self.tail_name_indexemb.values())}

self._loaded_ht_data = True

def _load_training_scores(self):
if self._loaded_training_scores:
return self.training_scores

self._load_head_tail_entities()

training_scores = np.ones((len(self.head_entities), len(self.tail_entities)),
dtype=np.int32)

if self._compute_filtered_metrics:
# careful here: c must be in head entities and d must be in tail entities
for axiom in self.axioms_to_filter:
c, _, d = axiom.astuple()
if (c not in self.head_entities) or not (d in self.tail_entities):
continue

c, d = self.head_name_indexemb[c], self.tail_name_indexemb[d]
c, d = self.head_indexemb_indexsc[c], self.tail_indexemb_indexsc[d]

training_scores[c, d] = 10000

logging.info("Training scores created")

self._loaded_training_scores = True
return training_scores

def _init_axioms(self, axioms):

if axioms is None:
return None

projector = projector_factory("taxonomy_rels", relations=["http://interacts_with"])

edges = projector.project(axioms)
return edges # List of Edges

def compute_axiom_rank(self, axiom):

self.training_scores = self._load_training_scores()

c, r, d = axiom.astuple()

if not (c in self.head_entities) or not (d in self.tail_entities):
return None, None, None

# Embedding indices
c_emb_idx, d_emb_idx = self.head_name_indexemb[c], self.tail_name_indexemb[d]

# Scores matrix labels
c_sc_idx, d_sc_idx = self.head_indexemb_indexsc[c_emb_idx],
self.tail_indexemb_indexsc[d_emb_idx]

r = self.relation_name_indexemb[r]

data = th.tensor([
[c_emb_idx, r, self.tail_name_indexemb[x]] for x in
self.tail_entities]).to(self.device)

res = self.eval_method(data).squeeze().cpu().detach().numpy()

# self.testing_predictions[c_sc_idx, :] = res
index = rankdata(res, method='average')
rank = index[d_sc_idx]

findex = rankdata((res * self.training_scores[c_sc_idx, :]), method='average')
frank = findex[d_sc_idx]

return rank, frank, len(self.tail_entities)
77 changes: 77 additions & 0 deletions mowl/models/boxsquaredel/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

from mowl.nn import BoxSquaredELModule
from mowl.base_models.elmodel import EmbeddingELModel
from mowl.models.boxsquaredel.evaluate import BoxSquaredELPPIEvaluator
import torch as th
from torch import nn


class BoxSquaredEL(EmbeddingELModel):
"""
Implementation based on [peng2020]_.
"""

def __init__(self,
dataset,
embed_dim=50,
margin=0.02,
reg_norm=1,
learning_rate=0.001,
epochs=1000,
batch_size=4096 * 8,
delta=2.5,
reg_factor=0.2,
num_negs=4,
model_filepath=None,
device='cpu'
):
super().__init__(dataset, embed_dim, batch_size, extended=True, model_filepath=model_filepath)


self.margin = margin
self.reg_norm = reg_norm
self.delta = delta
self.reg_factor = reg_factor
self.num_negs = num_negs
self.learning_rate = learning_rate
self.epochs = epochs
self.device = device
self._loaded = False
self.extended = False
self.init_module()

def init_module(self):
self.module = BoxSquaredELModule(
len(self.class_index_dict),
len(self.object_property_index_dict),
embed_dim=self.embed_dim,
gamma=self.margin,
delta=self.delta,
reg_factor=self.reg_factor

).to(self.device)

def train(self):
raise NotImplementedError


def eval_method(self, data):
return self.module.gci2_score(data)

def get_embeddings(self):
self.init_module()

print('Load the best model', self.model_filepath)
self.load_best_model()

ent_embeds = {k: v for k, v in zip(self.class_index_dict.keys(),
self.module.class_embed.weight.cpu().detach().numpy())}
rel_embeds = {k: v for k, v in zip(self.object_property_index_dict.keys(),
self.module.rel_embed.weight.cpu().detach().numpy())}
return ent_embeds, rel_embeds

def load_best_model(self):
self.init_module()
self.module.load_state_dict(th.load(self.model_filepath))
self.module.eval()

Loading

0 comments on commit d6dd4db

Please sign in to comment.