Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Depend on main REL and get tests to pass [do not merge] #1

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
coverage.json
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
pyvenv.cfg

# Spyder project settings
.spyderproject
.spyproject

# vscode
.vscode/

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
3 changes: 0 additions & 3 deletions .gitmodules

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
"hash": "a53556614ea0d196de1dd499c6cd4b1019f00d4a13a34e20ba99029df2a473df"
},
"kernelspec": {
"display_name": "Python 3.8.13 ('220429_conel22_github')",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -126,9 +126,8 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
Expand Down
22 changes: 22 additions & 0 deletions crel/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[metadata]
name = crel

[options]
zip_safe = False
packages = find_namespace:
package_dir =
= src
include_package_data = True
install_requires =
radboud-el
spacy

[options.extras_require]
develop =
pytest

[options.packages.find]
where = src

[tool:pytest]
testpaths = tests
2 changes: 2 additions & 0 deletions crel/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from setuptools import setup
setup()
Empty file added crel/src/crel/__init__.py
Empty file.
File renamed without changes.
32 changes: 20 additions & 12 deletions tool/conv_el.py → crel/src/crel/conv_el.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import sys
sys.path.append('s2e_pe')
import pe_data
from .s2e_pe import pe_data
import importlib
from bert_md import BERT_MD
from rel_ed import REL_ED
from pe import EEMD, PEMD
from .bert_md import BERT_MD
from .rel_ed import REL_ED
from .s2e_pe.pe import EEMD, PEMD

class ConvEL():
def __init__(self, threshold=0):
def __init__(self, threshold=0, config=None):
self.threshold = threshold

conf = self.ConfigConvEL()
if not config:
config = {}

conf = self.ConfigConvEL(**config)

self.bert_md = BERT_MD(conf.file_pretrained)
self.rel_ed = REL_ED(conf.base_url, conf.wiki_version)
self.eemd = EEMD()
self.eemd = EEMD(config=config)
self.pemd = PEMD()

self.preprocess = pe_data.PreProcess()
Expand All @@ -24,13 +27,18 @@ def __init__(self, threshold=0):
self.ment2ent = {} # This will be used for PE Linking

class ConfigConvEL():
def __init__(self):
def __init__(self,
file_pretrained = './bert_conv-td',
base_url = './rel_conv_project_folder',
wiki_version='wiki_2019',
**kwargs
):
# MD
self.file_pretrained = './bert_conv-td'
self.file_pretrained = file_pretrained

# ED
self.base_url = './rel_conv_project_folder'
self.wiki_version = "wiki_2019"
self.base_url = base_url
self.wiki_version = wiki_version

# NOTE: PE Config is in EEMD class

Expand Down
1 change: 0 additions & 1 deletion tool/rel_ed.py → crel/src/crel/rel_ed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import sys
sys.path.append('./REL')
from REL.entity_disambiguation import EntityDisambiguation
from REL.utils import process_results
from REL.mention_detection import MentionDetection
Expand Down
Empty file.
File renamed without changes.
4 changes: 2 additions & 2 deletions tool/s2e_pe/data.py → crel/src/crel/s2e_pe/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

import torch

from consts import SPEAKER_START, SPEAKER_END, NULL_ID_FOR_COREF
from utils import flatten_list_of_lists
from .consts import SPEAKER_START, SPEAKER_END, NULL_ID_FOR_COREF
from .utils import flatten_list_of_lists
from torch.utils.data import Dataset

CorefExample = namedtuple("CorefExample", ["token_ids", "clusters"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from transformers.modeling_bert import ACT2FN
except: # If you use `jupyterlab-debugger`
from transformers.models.bert.modeling_bert import ACT2FN
from utils import extract_clusters, extract_mentions_to_predicted_clusters_from_clusters, mask_tensor #, ce_get_start_end_subtoken_num
from .utils import extract_clusters, extract_mentions_to_predicted_clusters_from_clusters, mask_tensor #, ce_get_start_end_subtoken_num
import os
import json

Expand Down
22 changes: 14 additions & 8 deletions tool/s2e_pe/pe.py → crel/src/crel/s2e_pe/pe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
spacy.cli.download("en_core_web_md")
import en_core_web_md
nlp = en_core_web_md.load()
from pe_data import PreProcess # to use get_span()
from .pe_data import PreProcess # to use get_span()

# EEMD
import data
from . import data
import torch
from transformers import AutoConfig, AutoTokenizer, LongformerConfig
from modeling import S2E
from coref_bucket_batch_sampler import BucketBatchSampler
from .modeling import S2E
from .coref_bucket_batch_sampler import BucketBatchSampler


class PEMD():
Expand Down Expand Up @@ -119,16 +119,22 @@ class EEMD():
"""Find corresponding explicit entity mention using s2e-coref-based method
"""

def __init__(self):
self.conf = self.Config()
def __init__(self, config=None):
if not config:
config = {}

self.conf = self.Config(**config)
self.model = self._read_model()

class Config():
"""Inner class for config
"""
def __init__(self):
def __init__(self,
s2e_pe_model='./s2e_pe/model/s2e_ast_onto',
**kwargs
):
self.max_seq_length = 4096
self.model_name_or_path = './s2e_pe/model/s2e_ast_onto'
self.model_name_or_path = s2e_pe_model
self.max_total_seq_len = 4096
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# self.device = torch.device("cpu") # TMP
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion tool/s2e_pe/utils.py → crel/src/crel/s2e_pe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch
import numpy as np

from consts import NULL_ID_FOR_COREF
from .consts import NULL_ID_FOR_COREF


def flatten_list_of_lists(lst):
Expand Down
Loading