From 4583e70277408cf0f8e8aa30e17c61713d1d1574 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 12 Dec 2022 16:37:05 +0100 Subject: [PATCH 1/6] Depend on main REL and get tests to pass --- .gitignore | 134 ++++++++++++++++++ tool/REL | 1 - tool/conversational_entity_linking.ipynb | 25 +++- tool/crel/__init__.py | 0 tool/{ => crel}/bert_md.py | 0 tool/{ => crel}/conv_el.py | 32 +++-- tool/{ => crel}/rel_ed.py | 1 - tool/crel/s2e_pe/__init__.py | 0 tool/{ => crel}/s2e_pe/consts.py | 0 .../s2e_pe/coref_bucket_batch_sampler.py | 0 tool/{ => crel}/s2e_pe/data.py | 4 +- tool/{ => crel}/s2e_pe/modeling.py | 2 +- tool/{ => crel}/s2e_pe/pe.py | 22 +-- tool/{ => crel}/s2e_pe/pe_data.py | 0 tool/{ => crel}/s2e_pe/utils.py | 2 +- tool/requirements.txt | 1 + tool/setup.cfg | 2 + tool/setup.py | 2 + tool/tests/test_crel.py | 63 ++++++++ 19 files changed, 258 insertions(+), 33 deletions(-) create mode 100644 .gitignore delete mode 160000 tool/REL create mode 100644 tool/crel/__init__.py rename tool/{ => crel}/bert_md.py (100%) rename tool/{ => crel}/conv_el.py (87%) rename tool/{ => crel}/rel_ed.py (98%) create mode 100644 tool/crel/s2e_pe/__init__.py rename tool/{ => crel}/s2e_pe/consts.py (100%) rename tool/{ => crel}/s2e_pe/coref_bucket_batch_sampler.py (100%) rename tool/{ => crel}/s2e_pe/data.py (98%) rename tool/{ => crel}/s2e_pe/modeling.py (99%) rename tool/{ => crel}/s2e_pe/pe.py (95%) rename tool/{ => crel}/s2e_pe/pe_data.py (100%) rename tool/{ => crel}/s2e_pe/utils.py (99%) create mode 100644 tool/requirements.txt create mode 100644 tool/setup.cfg create mode 100644 tool/setup.py create mode 100644 tool/tests/test_crel.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f42747d --- /dev/null +++ b/.gitignore @@ -0,0 +1,134 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +coverage.json +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pyvenv.cfg + +# Spyder project settings +.spyderproject +.spyproject + +# vscode +.vscode/ + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/tool/REL b/tool/REL deleted file mode 160000 index 9ca253b..0000000 --- a/tool/REL +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9ca253b1d371966c39219ed672f39784fd833d8d diff --git a/tool/conversational_entity_linking.ipynb b/tool/conversational_entity_linking.ipynb index c9d486c..2ff77c2 100644 --- a/tool/conversational_entity_linking.ipynb +++ b/tool/conversational_entity_linking.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -12,9 +12,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "AssertionError", + "evalue": "PE Linking model folder not found. Please download the model file following the instructions in the README.md.", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn [3], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Confirm the existence of the models\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# assert os.path.exists('./bert_conv-td'), 'MD model file not found. Please download the model file following the instructions in the README.md.'\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# assert os.path.exists('./rel_conv_project_folder'), 'ED model file not found. Please download the model file following the instructions in the README.md.'\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./s2e_pe/model/s2e_ast_onto\u001b[39m\u001b[38;5;124m'\u001b[39m), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPE Linking model folder not found. Please download the model file following the instructions in the README.md.\u001b[39m\u001b[38;5;124m'\u001b[39m\n", + "\u001b[1;31mAssertionError\u001b[0m: PE Linking model folder not found. Please download the model file following the instructions in the README.md." + ] + } + ], "source": [ "# Confirm the existence of the models\n", "assert os.path.exists('./bert_conv-td'), 'MD model file not found. Please download the model file following the instructions in the README.md.'\n", @@ -112,7 +124,7 @@ "hash": "a53556614ea0d196de1dd499c6cd4b1019f00d4a13a34e20ba99029df2a473df" }, "kernelspec": { - "display_name": "Python 3.8.13 ('220429_conel22_github')", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -126,9 +138,8 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "orig_nbformat": 4 + "version": "3.9.15" + } }, "nbformat": 4, "nbformat_minor": 2 diff --git a/tool/crel/__init__.py b/tool/crel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tool/bert_md.py b/tool/crel/bert_md.py similarity index 100% rename from tool/bert_md.py rename to tool/crel/bert_md.py diff --git a/tool/conv_el.py b/tool/crel/conv_el.py similarity index 87% rename from tool/conv_el.py rename to tool/crel/conv_el.py index 6d0ace1..e9e969f 100644 --- a/tool/conv_el.py +++ b/tool/crel/conv_el.py @@ -1,19 +1,22 @@ import sys -sys.path.append('s2e_pe') -import pe_data +from .s2e_pe import pe_data import importlib -from bert_md import BERT_MD -from rel_ed import REL_ED -from pe import EEMD, PEMD +from .bert_md import BERT_MD +from .rel_ed import REL_ED +from .s2e_pe.pe import EEMD, PEMD class ConvEL(): - def __init__(self, threshold=0): + def __init__(self, threshold=0, config=None): self.threshold = threshold - conf = self.ConfigConvEL() + if not config: + config = {} + + conf = self.ConfigConvEL(**config) + self.bert_md = BERT_MD(conf.file_pretrained) self.rel_ed = REL_ED(conf.base_url, conf.wiki_version) - self.eemd = EEMD() + self.eemd = EEMD(config=config) self.pemd = PEMD() self.preprocess = pe_data.PreProcess() @@ -24,13 +27,18 @@ def __init__(self, threshold=0): self.ment2ent = {} # This will be used for PE Linking class ConfigConvEL(): - def __init__(self): + def __init__(self, + file_pretrained = './bert_conv-td', + base_url = './rel_conv_project_folder', + wiki_version='wiki_2019', + **kwargs + ): # MD - self.file_pretrained = './bert_conv-td' + self.file_pretrained = file_pretrained # ED - self.base_url = './rel_conv_project_folder' - self.wiki_version = "wiki_2019" + self.base_url = base_url + self.wiki_version = wiki_version # NOTE: PE Config is in EEMD class diff --git a/tool/rel_ed.py b/tool/crel/rel_ed.py similarity index 98% rename from tool/rel_ed.py rename to tool/crel/rel_ed.py index f8644c1..a82d5c8 100644 --- a/tool/rel_ed.py +++ b/tool/crel/rel_ed.py @@ -1,5 +1,4 @@ import sys -sys.path.append('./REL') from REL.entity_disambiguation import EntityDisambiguation from REL.utils import process_results from REL.mention_detection import MentionDetection diff --git a/tool/crel/s2e_pe/__init__.py b/tool/crel/s2e_pe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tool/s2e_pe/consts.py b/tool/crel/s2e_pe/consts.py similarity index 100% rename from tool/s2e_pe/consts.py rename to tool/crel/s2e_pe/consts.py diff --git a/tool/s2e_pe/coref_bucket_batch_sampler.py b/tool/crel/s2e_pe/coref_bucket_batch_sampler.py similarity index 100% rename from tool/s2e_pe/coref_bucket_batch_sampler.py rename to tool/crel/s2e_pe/coref_bucket_batch_sampler.py diff --git a/tool/s2e_pe/data.py b/tool/crel/s2e_pe/data.py similarity index 98% rename from tool/s2e_pe/data.py rename to tool/crel/s2e_pe/data.py index 7193834..066e543 100644 --- a/tool/s2e_pe/data.py +++ b/tool/crel/s2e_pe/data.py @@ -6,8 +6,8 @@ import torch -from consts import SPEAKER_START, SPEAKER_END, NULL_ID_FOR_COREF -from utils import flatten_list_of_lists +from .consts import SPEAKER_START, SPEAKER_END, NULL_ID_FOR_COREF +from .utils import flatten_list_of_lists from torch.utils.data import Dataset CorefExample = namedtuple("CorefExample", ["token_ids", "clusters"]) diff --git a/tool/s2e_pe/modeling.py b/tool/crel/s2e_pe/modeling.py similarity index 99% rename from tool/s2e_pe/modeling.py rename to tool/crel/s2e_pe/modeling.py index e47d463..068c343 100644 --- a/tool/s2e_pe/modeling.py +++ b/tool/crel/s2e_pe/modeling.py @@ -5,7 +5,7 @@ from transformers.modeling_bert import ACT2FN except: # If you use `jupyterlab-debugger` from transformers.models.bert.modeling_bert import ACT2FN -from utils import extract_clusters, extract_mentions_to_predicted_clusters_from_clusters, mask_tensor #, ce_get_start_end_subtoken_num +from .utils import extract_clusters, extract_mentions_to_predicted_clusters_from_clusters, mask_tensor #, ce_get_start_end_subtoken_num import os import json diff --git a/tool/s2e_pe/pe.py b/tool/crel/s2e_pe/pe.py similarity index 95% rename from tool/s2e_pe/pe.py rename to tool/crel/s2e_pe/pe.py index a7ae2ab..533c479 100644 --- a/tool/s2e_pe/pe.py +++ b/tool/crel/s2e_pe/pe.py @@ -10,14 +10,14 @@ spacy.cli.download("en_core_web_md") import en_core_web_md nlp = en_core_web_md.load() -from pe_data import PreProcess # to use get_span() +from .pe_data import PreProcess # to use get_span() # EEMD -import data +from . import data import torch from transformers import AutoConfig, AutoTokenizer, LongformerConfig -from modeling import S2E -from coref_bucket_batch_sampler import BucketBatchSampler +from .modeling import S2E +from .coref_bucket_batch_sampler import BucketBatchSampler class PEMD(): @@ -119,16 +119,22 @@ class EEMD(): """Find corresponding explicit entity mention using s2e-coref-based method """ - def __init__(self): - self.conf = self.Config() + def __init__(self, config=None): + if not config: + config = {} + + self.conf = self.Config(**config) self.model = self._read_model() class Config(): """Inner class for config """ - def __init__(self): + def __init__(self, + s2e_pe_model='./s2e_pe/model/s2e_ast_onto', + **kwargs + ): self.max_seq_length = 4096 - self.model_name_or_path = './s2e_pe/model/s2e_ast_onto' + self.model_name_or_path = s2e_pe_model self.max_total_seq_len = 4096 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # self.device = torch.device("cpu") # TMP diff --git a/tool/s2e_pe/pe_data.py b/tool/crel/s2e_pe/pe_data.py similarity index 100% rename from tool/s2e_pe/pe_data.py rename to tool/crel/s2e_pe/pe_data.py diff --git a/tool/s2e_pe/utils.py b/tool/crel/s2e_pe/utils.py similarity index 99% rename from tool/s2e_pe/utils.py rename to tool/crel/s2e_pe/utils.py index a32e77b..79c325e 100644 --- a/tool/s2e_pe/utils.py +++ b/tool/crel/s2e_pe/utils.py @@ -6,7 +6,7 @@ import torch import numpy as np -from consts import NULL_ID_FOR_COREF +from .consts import NULL_ID_FOR_COREF def flatten_list_of_lists(lst): diff --git a/tool/requirements.txt b/tool/requirements.txt new file mode 100644 index 0000000..33ea94c --- /dev/null +++ b/tool/requirements.txt @@ -0,0 +1 @@ +spacy \ No newline at end of file diff --git a/tool/setup.cfg b/tool/setup.cfg new file mode 100644 index 0000000..2d362d3 --- /dev/null +++ b/tool/setup.cfg @@ -0,0 +1,2 @@ +[metadata] +name = crel diff --git a/tool/setup.py b/tool/setup.py new file mode 100644 index 0000000..8ab824c --- /dev/null +++ b/tool/setup.py @@ -0,0 +1,2 @@ +from setuptools import setup +setup() \ No newline at end of file diff --git a/tool/tests/test_crel.py b/tool/tests/test_crel.py new file mode 100644 index 0000000..9c2b36b --- /dev/null +++ b/tool/tests/test_crel.py @@ -0,0 +1,63 @@ +import os +from crel.conv_el import ConvEL + +from pathlib import Path + +os.environ["CUDA_VISIBLE_DEVICES"]="1" + + +file_pretrained = Path("S:\\rel\\bert_conv-td") +s2e_pe_model = Path("S:\\rel\\s2e_ast_onto") +base_url = Path("S:\\rel\\rel_data") + +CONFIG = { + 'file_pretrained': str(file_pretrained), + 'base_url': str(base_url), + 's2e_pe_model': str(s2e_pe_model), + } + + +def print_results(results): + for res in results: + print(f'{res["speaker"][:4]}: {res["utterance"]}') + if res["speaker"] == 'SYSTEM': continue + for ann in res['annotations']: + print('\t', ann) + + +def test_conv1(): + cel = ConvEL(config=CONFIG) + + example = [ + + {"speaker": "USER", + "utterance": "I think science fiction is an amazing genre for anything. Future science, technology, time travel, FTL travel, they're all such interesting concepts.",}, + + # System turn should not have mentions or pems + {"speaker": "SYSTEM", + "utterance": "Awesome! I really love how sci-fi storytellers focus on political/social/philosophical issues that would still be around even in the future. Makes them relatable.",}, + + {"speaker": "USER", + "utterance": "I agree. One of my favorite forms of science fiction is anything related to time travel! I find it fascinating.",}, + ] + + result = cel.annotate(example) + print_results(result) + +def test_conv2(): + cel = ConvEL(config=CONFIG) + + example = [ + {"speaker": "USER", + "utterance": "I am allergic to tomatoes but we have a lot of famous Italian restaurants here in London.",}, + + # System turn should not have mentions or pems + {"speaker": "SYSTEM", + "utterance": "Some people are allergic to histamine in tomatoes.",}, + + {"speaker": "USER", + "utterance": "Talking of food, can you recommend me a restaurant in my city for our anniversary?",}, + ] + + result = cel.annotate(example) + print_results(result) \ No newline at end of file From 9f086b5e47113c1f67e5bb46dd26c7d77cfc6467 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 13 Dec 2022 11:00:28 +0100 Subject: [PATCH 2/6] Update tests --- tool/requirements.txt | 3 +- tool/tests/test_crel.py | 107 +++++++++++++++++++++++++++++----------- 2 files changed, 80 insertions(+), 30 deletions(-) diff --git a/tool/requirements.txt b/tool/requirements.txt index 33ea94c..2fe7be9 100644 --- a/tool/requirements.txt +++ b/tool/requirements.txt @@ -1 +1,2 @@ -spacy \ No newline at end of file +spacy +pytest \ No newline at end of file diff --git a/tool/tests/test_crel.py b/tool/tests/test_crel.py index 9c2b36b..ce15802 100644 --- a/tool/tests/test_crel.py +++ b/tool/tests/test_crel.py @@ -1,20 +1,20 @@ import os +import pytest from crel.conv_el import ConvEL from pathlib import Path -os.environ["CUDA_VISIBLE_DEVICES"]="1" - +os.environ["CUDA_VISIBLE_DEVICES"] = "1" file_pretrained = Path("S:\\rel\\bert_conv-td") s2e_pe_model = Path("S:\\rel\\s2e_ast_onto") base_url = Path("S:\\rel\\rel_data") CONFIG = { - 'file_pretrained': str(file_pretrained), - 'base_url': str(base_url), - 's2e_pe_model': str(s2e_pe_model), - } + 'file_pretrained': str(file_pretrained), + 'base_url': str(base_url), + 's2e_pe_model': str(s2e_pe_model), +} def print_results(results): @@ -25,39 +25,88 @@ def print_results(results): print('\t', ann) -def test_conv1(): - cel = ConvEL(config=CONFIG) +@pytest.fixture +def cel(): + return ConvEL(config=CONFIG) + +def test_conv1(cel): example = [ - - {"speaker": "USER", - "utterance": "I think science fiction is an amazing genre for anything. Future science, technology, time travel, FTL travel, they're all such interesting concepts.",}, + { + "speaker": + "USER", + "utterance": + "I think science fiction is an amazing genre for anything. Future science, technology, time travel, FTL travel, they're all such interesting concepts.", + }, + { + "speaker": + "SYSTEM", + "utterance": + "Awesome! I really love how sci-fi storytellers focus on political/social/philosophical issues that would still be around even in the future. Makes them relatable.", + }, + { + "speaker": + "USER", + "utterance": + "I agree. One of my favorite forms of science fiction is anything related to time travel! I find it fascinating.", + }, + ] - # System turn should not have mentions or pems - {"speaker": "SYSTEM", - "utterance": "Awesome! I really love how sci-fi storytellers focus on political/social/philosophical issues that would still be around even in the future. Makes them relatable.",}, + result = cel.annotate(example) + assert isinstance(result, list) + + expected_annotations = [ + [[8, 15, 'science fiction', 'Science_fiction'], + [38, 5, 'genre', 'Genre_fiction'], + [74, 10, 'technology', 'Technology'], + [86, 11, 'time travel', 'Time_travel'], + [99, 10, 'FTL travel', 'Faster-than-light']], + [[37, 15, 'science fiction', 'Science_fiction'], + [76, 11, 'time travel', 'Time_travel'], + [16, 36, 'my favorite forms of science fiction', 'Time_travel']], + ] - {"speaker": "USER", - "utterance": "I agree. One of my favorite forms of science fiction is anything related to time travel! I find it fascinating.",}, + annotations = [ + res['annotations'] for res in result if res['speaker'] == 'USER' ] - result = cel.annotate(example) - print_results(result) + assert annotations == expected_annotations -def test_conv2(): - cel = ConvEL(config=CONFIG) +def test_conv2(cel): example = [ - {"speaker": "USER", - "utterance": "I am allergic to tomatoes but we have a lot of famous Italian restaurants here in London.",}, + { + "speaker": + "USER", + "utterance": + "I am allergic to tomatoes but we have a lot of famous Italian restaurants here in London.", + }, + { + "speaker": "SYSTEM", + "utterance": "Some people are allergic to histamine in tomatoes.", + }, + { + "speaker": + "USER", + "utterance": + "Talking of food, can you recommend me a restaurant in my city for our anniversary?", + }, + ] - # System turn should not have mentions or pems - {"speaker": "SYSTEM", - "utterance": "Some people are allergic to histamine in tomatoes.",}, + result = cel.annotate(example) + assert isinstance(result, list) - {"speaker": "USER", - "utterance": "Talking of food, can you recommend me a restaurant in my city for our anniversary?",}, + annotations = [ + res['annotations'] for res in result if res['speaker'] == 'USER' ] - result = cel.annotate(example) - print_results(result) \ No newline at end of file + expected_annotations = [ + [[17, 8, 'tomatoes', 'Tomato'], + [54, 19, 'Italian restaurants', 'Italian_cuisine'], + [82, 6, 'London', 'London']], + [[11, 4, 'food', 'Food'], + [40, 10, 'restaurant', 'Restaurant'], + [54, 7, 'my city', 'London']], + ] + + assert annotations == expected_annotations From 757a8e0184a3ec37935c8a5c5d64c14991b15575 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 13 Dec 2022 11:36:51 +0100 Subject: [PATCH 3/6] Reset notebook --- tool/conversational_entity_linking.ipynb | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tool/conversational_entity_linking.ipynb b/tool/conversational_entity_linking.ipynb index 2ff77c2..992dedb 100644 --- a/tool/conversational_entity_linking.ipynb +++ b/tool/conversational_entity_linking.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12,21 +12,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "PE Linking model folder not found. Please download the model file following the instructions in the README.md.", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn [3], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# Confirm the existence of the models\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# assert os.path.exists('./bert_conv-td'), 'MD model file not found. Please download the model file following the instructions in the README.md.'\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# assert os.path.exists('./rel_conv_project_folder'), 'ED model file not found. Please download the model file following the instructions in the README.md.'\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./s2e_pe/model/s2e_ast_onto\u001b[39m\u001b[38;5;124m'\u001b[39m), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPE Linking model folder not found. Please download the model file following the instructions in the README.md.\u001b[39m\u001b[38;5;124m'\u001b[39m\n", - "\u001b[1;31mAssertionError\u001b[0m: PE Linking model folder not found. Please download the model file following the instructions in the README.md." - ] - } - ], + "outputs": [], "source": [ "# Confirm the existence of the models\n", "assert os.path.exists('./bert_conv-td'), 'MD model file not found. Please download the model file following the instructions in the README.md.'\n", From 31a3131afbc5e46966c61013eeb2322a4ccd9063 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 13 Dec 2022 11:41:56 +0100 Subject: [PATCH 4/6] Use src layout --- tool/setup.cfg | 20 +++++++++++++++++++ tool/{ => src}/crel/__init__.py | 0 tool/{ => src}/crel/bert_md.py | 0 tool/{ => src}/crel/conv_el.py | 0 tool/{ => src}/crel/rel_ed.py | 0 tool/{ => src}/crel/s2e_pe/__init__.py | 0 tool/{ => src}/crel/s2e_pe/consts.py | 0 .../crel/s2e_pe/coref_bucket_batch_sampler.py | 0 tool/{ => src}/crel/s2e_pe/data.py | 0 tool/{ => src}/crel/s2e_pe/modeling.py | 0 tool/{ => src}/crel/s2e_pe/pe.py | 0 tool/{ => src}/crel/s2e_pe/pe_data.py | 0 tool/{ => src}/crel/s2e_pe/utils.py | 0 13 files changed, 20 insertions(+) rename tool/{ => src}/crel/__init__.py (100%) rename tool/{ => src}/crel/bert_md.py (100%) rename tool/{ => src}/crel/conv_el.py (100%) rename tool/{ => src}/crel/rel_ed.py (100%) rename tool/{ => src}/crel/s2e_pe/__init__.py (100%) rename tool/{ => src}/crel/s2e_pe/consts.py (100%) rename tool/{ => src}/crel/s2e_pe/coref_bucket_batch_sampler.py (100%) rename tool/{ => src}/crel/s2e_pe/data.py (100%) rename tool/{ => src}/crel/s2e_pe/modeling.py (100%) rename tool/{ => src}/crel/s2e_pe/pe.py (100%) rename tool/{ => src}/crel/s2e_pe/pe_data.py (100%) rename tool/{ => src}/crel/s2e_pe/utils.py (100%) diff --git a/tool/setup.cfg b/tool/setup.cfg index 2d362d3..fea8aef 100644 --- a/tool/setup.cfg +++ b/tool/setup.cfg @@ -1,2 +1,22 @@ [metadata] name = crel + +[options] +zip_safe = False +packages = find_namespace: +package_dir = + = src +include_package_data = True +install_requires = + radboud-el + spacy + +[options.extras_require] +develop = + pytest + +[options.packages.find] +where = src + +[tool:pytest] +testpaths = tests diff --git a/tool/crel/__init__.py b/tool/src/crel/__init__.py similarity index 100% rename from tool/crel/__init__.py rename to tool/src/crel/__init__.py diff --git a/tool/crel/bert_md.py b/tool/src/crel/bert_md.py similarity index 100% rename from tool/crel/bert_md.py rename to tool/src/crel/bert_md.py diff --git a/tool/crel/conv_el.py b/tool/src/crel/conv_el.py similarity index 100% rename from tool/crel/conv_el.py rename to tool/src/crel/conv_el.py diff --git a/tool/crel/rel_ed.py b/tool/src/crel/rel_ed.py similarity index 100% rename from tool/crel/rel_ed.py rename to tool/src/crel/rel_ed.py diff --git a/tool/crel/s2e_pe/__init__.py b/tool/src/crel/s2e_pe/__init__.py similarity index 100% rename from tool/crel/s2e_pe/__init__.py rename to tool/src/crel/s2e_pe/__init__.py diff --git a/tool/crel/s2e_pe/consts.py b/tool/src/crel/s2e_pe/consts.py similarity index 100% rename from tool/crel/s2e_pe/consts.py rename to tool/src/crel/s2e_pe/consts.py diff --git a/tool/crel/s2e_pe/coref_bucket_batch_sampler.py b/tool/src/crel/s2e_pe/coref_bucket_batch_sampler.py similarity index 100% rename from tool/crel/s2e_pe/coref_bucket_batch_sampler.py rename to tool/src/crel/s2e_pe/coref_bucket_batch_sampler.py diff --git a/tool/crel/s2e_pe/data.py b/tool/src/crel/s2e_pe/data.py similarity index 100% rename from tool/crel/s2e_pe/data.py rename to tool/src/crel/s2e_pe/data.py diff --git a/tool/crel/s2e_pe/modeling.py b/tool/src/crel/s2e_pe/modeling.py similarity index 100% rename from tool/crel/s2e_pe/modeling.py rename to tool/src/crel/s2e_pe/modeling.py diff --git a/tool/crel/s2e_pe/pe.py b/tool/src/crel/s2e_pe/pe.py similarity index 100% rename from tool/crel/s2e_pe/pe.py rename to tool/src/crel/s2e_pe/pe.py diff --git a/tool/crel/s2e_pe/pe_data.py b/tool/src/crel/s2e_pe/pe_data.py similarity index 100% rename from tool/crel/s2e_pe/pe_data.py rename to tool/src/crel/s2e_pe/pe_data.py diff --git a/tool/crel/s2e_pe/utils.py b/tool/src/crel/s2e_pe/utils.py similarity index 100% rename from tool/crel/s2e_pe/utils.py rename to tool/src/crel/s2e_pe/utils.py From 6c656d76e099d59e073005f8b52a22d99e2805cb Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 13 Dec 2022 11:42:05 +0100 Subject: [PATCH 5/6] Remove submodule --- .gitmodules | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .gitmodules diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index b1ee28a..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "tool/REL"] - path = tool/REL - url = https://github.com/informagi/REL From 1d7c2eb837dc55df5cddc6906012c2146d901933 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 13 Dec 2022 11:48:23 +0100 Subject: [PATCH 6/6] Rename directory --- {tool => crel}/README.md | 0 {tool => crel}/conversational_entity_linking.ipynb | 0 {tool => crel}/setup.cfg | 0 {tool => crel}/setup.py | 0 {tool => crel}/src/crel/__init__.py | 0 {tool => crel}/src/crel/bert_md.py | 0 {tool => crel}/src/crel/conv_el.py | 0 {tool => crel}/src/crel/rel_ed.py | 0 {tool => crel}/src/crel/s2e_pe/__init__.py | 0 {tool => crel}/src/crel/s2e_pe/consts.py | 0 {tool => crel}/src/crel/s2e_pe/coref_bucket_batch_sampler.py | 0 {tool => crel}/src/crel/s2e_pe/data.py | 0 {tool => crel}/src/crel/s2e_pe/modeling.py | 0 {tool => crel}/src/crel/s2e_pe/pe.py | 0 {tool => crel}/src/crel/s2e_pe/pe_data.py | 0 {tool => crel}/src/crel/s2e_pe/utils.py | 0 {tool => crel}/tests/test_crel.py | 0 17 files changed, 0 insertions(+), 0 deletions(-) rename {tool => crel}/README.md (100%) rename {tool => crel}/conversational_entity_linking.ipynb (100%) rename {tool => crel}/setup.cfg (100%) rename {tool => crel}/setup.py (100%) rename {tool => crel}/src/crel/__init__.py (100%) rename {tool => crel}/src/crel/bert_md.py (100%) rename {tool => crel}/src/crel/conv_el.py (100%) rename {tool => crel}/src/crel/rel_ed.py (100%) rename {tool => crel}/src/crel/s2e_pe/__init__.py (100%) rename {tool => crel}/src/crel/s2e_pe/consts.py (100%) rename {tool => crel}/src/crel/s2e_pe/coref_bucket_batch_sampler.py (100%) rename {tool => crel}/src/crel/s2e_pe/data.py (100%) rename {tool => crel}/src/crel/s2e_pe/modeling.py (100%) rename {tool => crel}/src/crel/s2e_pe/pe.py (100%) rename {tool => crel}/src/crel/s2e_pe/pe_data.py (100%) rename {tool => crel}/src/crel/s2e_pe/utils.py (100%) rename {tool => crel}/tests/test_crel.py (100%) diff --git a/tool/README.md b/crel/README.md similarity index 100% rename from tool/README.md rename to crel/README.md diff --git a/tool/conversational_entity_linking.ipynb b/crel/conversational_entity_linking.ipynb similarity index 100% rename from tool/conversational_entity_linking.ipynb rename to crel/conversational_entity_linking.ipynb diff --git a/tool/setup.cfg b/crel/setup.cfg similarity index 100% rename from tool/setup.cfg rename to crel/setup.cfg diff --git a/tool/setup.py b/crel/setup.py similarity index 100% rename from tool/setup.py rename to crel/setup.py diff --git a/tool/src/crel/__init__.py b/crel/src/crel/__init__.py similarity index 100% rename from tool/src/crel/__init__.py rename to crel/src/crel/__init__.py diff --git a/tool/src/crel/bert_md.py b/crel/src/crel/bert_md.py similarity index 100% rename from tool/src/crel/bert_md.py rename to crel/src/crel/bert_md.py diff --git a/tool/src/crel/conv_el.py b/crel/src/crel/conv_el.py similarity index 100% rename from tool/src/crel/conv_el.py rename to crel/src/crel/conv_el.py diff --git a/tool/src/crel/rel_ed.py b/crel/src/crel/rel_ed.py similarity index 100% rename from tool/src/crel/rel_ed.py rename to crel/src/crel/rel_ed.py diff --git a/tool/src/crel/s2e_pe/__init__.py b/crel/src/crel/s2e_pe/__init__.py similarity index 100% rename from tool/src/crel/s2e_pe/__init__.py rename to crel/src/crel/s2e_pe/__init__.py diff --git a/tool/src/crel/s2e_pe/consts.py b/crel/src/crel/s2e_pe/consts.py similarity index 100% rename from tool/src/crel/s2e_pe/consts.py rename to crel/src/crel/s2e_pe/consts.py diff --git a/tool/src/crel/s2e_pe/coref_bucket_batch_sampler.py b/crel/src/crel/s2e_pe/coref_bucket_batch_sampler.py similarity index 100% rename from tool/src/crel/s2e_pe/coref_bucket_batch_sampler.py rename to crel/src/crel/s2e_pe/coref_bucket_batch_sampler.py diff --git a/tool/src/crel/s2e_pe/data.py b/crel/src/crel/s2e_pe/data.py similarity index 100% rename from tool/src/crel/s2e_pe/data.py rename to crel/src/crel/s2e_pe/data.py diff --git a/tool/src/crel/s2e_pe/modeling.py b/crel/src/crel/s2e_pe/modeling.py similarity index 100% rename from tool/src/crel/s2e_pe/modeling.py rename to crel/src/crel/s2e_pe/modeling.py diff --git a/tool/src/crel/s2e_pe/pe.py b/crel/src/crel/s2e_pe/pe.py similarity index 100% rename from tool/src/crel/s2e_pe/pe.py rename to crel/src/crel/s2e_pe/pe.py diff --git a/tool/src/crel/s2e_pe/pe_data.py b/crel/src/crel/s2e_pe/pe_data.py similarity index 100% rename from tool/src/crel/s2e_pe/pe_data.py rename to crel/src/crel/s2e_pe/pe_data.py diff --git a/tool/src/crel/s2e_pe/utils.py b/crel/src/crel/s2e_pe/utils.py similarity index 100% rename from tool/src/crel/s2e_pe/utils.py rename to crel/src/crel/s2e_pe/utils.py diff --git a/tool/tests/test_crel.py b/crel/tests/test_crel.py similarity index 100% rename from tool/tests/test_crel.py rename to crel/tests/test_crel.py