Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add architectures where the transformer is used only for embeddings #163

Draft
wants to merge 26 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d238ac5
add BERT+BidLSTM and BERT+BidLSTM+CRF base models
lfoppiano Jul 12, 2023
f634d24
typo
lfoppiano Jul 12, 2023
19493c5
Fix LSTM size, add Dense layer for ChainCRF
lfoppiano Jul 12, 2023
fcb764a
add crf related flags in configuration
lfoppiano Jul 12, 2023
09af3d1
Merge branch 'master' into bert-bidlstm-models
lfoppiano Jul 13, 2023
536e0de
reduce the size of the LSTM to try avoiding OOM
lfoppiano Jul 13, 2023
24dcfed
Merge branch 'master' into bert-bidlstm-models
lfoppiano Jul 18, 2023
78c8054
freze bert and concatenate embeddings
lfoppiano Aug 9, 2023
3cd4810
freze bert layer
lfoppiano Aug 9, 2023
40f8648
add frozen bert to bert_lstm_crf
lfoppiano Aug 10, 2023
86cddbf
update bert_bidlstm_chaincrf
lfoppiano Aug 10, 2023
f12b8b3
update tensorflow addons
lfoppiano Aug 10, 2023
33eed9b
reverted wrong committed file
lfoppiano Aug 10, 2023
22240ad
hacky solution for selecting the default config values
lfoppiano Aug 11, 2023
51a0d56
fix the learning rate for the hacky solution
lfoppiano Aug 11, 2023
359446c
add examples superconductors
lfoppiano Aug 11, 2023
3e07c97
use the same method everywhere to know if a model is using transformers
lfoppiano Aug 11, 2023
fb509d1
tag startwith
lfoppiano Aug 11, 2023
679b9a4
remove unused fields
lfoppiano Aug 15, 2023
8976db3
add character embedding channel
lfoppiano Aug 15, 2023
5e95960
LSTM output the same size of a single embedding
lfoppiano Aug 16, 2023
87144fc
revert change
lfoppiano Aug 16, 2023
9ecc400
update
lfoppiano Aug 18, 2023
3550c5b
cleanup
lfoppiano Aug 22, 2023
43318b5
remove chain embedding channel temporarly for ChainCRF
lfoppiano Aug 22, 2023
4a546b4
typo
lfoppiano Aug 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion delft/applications/datasetTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from delft.sequenceLabelling import Sequence
from delft.sequenceLabelling.reader import load_data_and_labels_json_offsets
from delft.sequenceLabelling.wrapper import is_transformer_architecture
from delft.utilities.misc import parse_number_ranges

def configure(architecture, output_path=None, max_sequence_length=-1, batch_size=-1, embeddings_name=None,
Expand All @@ -20,7 +21,7 @@ def configure(architecture, output_path=None, max_sequence_length=-1, batch_size
multiprocessing = True
early_stop = True

if "BERT" in architecture:
if is_transformer_architecture(architecture):
# architectures with some transformer layer/embeddings inside
if batch_size == -1:
#default
Expand Down
25 changes: 23 additions & 2 deletions delft/applications/grobidTagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,26 @@

from delft.sequenceLabelling import Sequence
from delft.sequenceLabelling.reader import load_data_and_labels_crf_file
from delft.sequenceLabelling.wrapper import is_transformer_architecture
from delft.utilities.Utilities import longest_row

MODEL_LIST = ['affiliation-address', 'citation', 'date', 'header', 'name-citation', 'name-header', 'software', 'figure', 'table', 'reference-segmenter']


# config = {
# "architectures": {
# "BERT.*":
# {
# "citation": {
# "max_sequence_length": 200,
# "batch_size": 20
# }
# },
# "BERT_BidLSTM.*": {
#
# }
# }

def configure(model, architecture, output_path=None, max_sequence_length=-1, batch_size=-1,
embeddings_name=None, max_epoch=-1, use_ELMo=False, patience=-1):
"""
Expand All @@ -26,7 +41,7 @@ def configure(model, architecture, output_path=None, max_sequence_length=-1, bat
multiprocessing = True
early_stop = True

if architecture and "BERT" in architecture:
if is_transformer_architecture(architecture):
# architectures with some transformer layer/embeddings inside

# non-default settings per model
Expand Down Expand Up @@ -352,7 +367,9 @@ class Tasks:
word_embeddings_examples = ['glove-840B', 'fasttext-crawl', 'word2vec']

architectures_transformers_based = [
'BERT', 'BERT_FEATURES', 'BERT_CRF', 'BERT_ChainCRF', 'BERT_CRF_FEATURES', 'BERT_ChainCRF_FEATURES', 'BERT_CRF_CHAR', 'BERT_CRF_CHAR_FEATURES'
'BERT', 'BERT_FEATURES', 'BERT_CRF', 'BERT_ChainCRF', 'BERT_CRF_FEATURES', 'BERT_ChainCRF_FEATURES',
'BERT_CRF_CHAR', 'BERT_CRF_CHAR_FEATURES',
'BERT_BidLSTM', 'BERT_BidLSTM_CRF', 'BERT_BidLSTM_ChainCRF'
]

architectures = architectures_word_embeddings + architectures_transformers_based
Expand Down Expand Up @@ -485,6 +502,10 @@ class Tasks:
someTexts.append("Wilcoxon signed-ranks tests were performed to calculate statistical significance of comparisons between alignment programs, which include ProbCons (version 1.10) (23), MAFFT (version 5.667) (11) with several options, MUSCLE (version 3.52) (10) and ClustalW (version 1.83) (7).")
someTexts.append("All statistical analyses were done using computer software Prism 6 for Windows (version 6.02; GraphPad Software, San Diego, CA, USA). One-Way ANOVA was used to detect differences amongst the groups. To account for the non-normal distribution of the data, all data were sorted by rank status prior to ANOVA statistical analysis. ")
someTexts.append("The statistical analysis was performed using IBM SPSS Statistics v. 20 (SPSS Inc, 2003, Chicago, USA).")
elif model.startswith('superconductors'):
someTexts.append("We are studying the material La 3 A 2 Ge 2 (A = Ir, Rh). The critical temperature T C = 4.7 K discovered for La 3 Ir 2 Ge 2 in this work is by about 1.2 K higher than that found for La 3 Rh 2 Ge 2.")
someTexts.append("In just a few months, the superconducting transition temperature (Tc) was increased to 55 K in the electron-doped system, as well as 25 K in hole-doped La1−x SrxOFeAs compound. Soon after, single crystals of LnFeAs(O1−x Fx) (Ln = Pr, Nd, Sm) were grown successfully by the NaCl/KCl flux method, though the sub-millimeter sizes limit the experimental studies on them. Therefore, FeAs-based single crystals with high crystalline quality, homogeneity and large sizes are highly desired for precise measurements of the properties. Very recently, the BaFe2As2 compound in a tetragonal ThCr2Si2-type structure with infinite Fe–As layers was reported. By replacing the alkaline earth elements (Ba and Sr) with alkali elements (Na, K, and Cs), superconductivity up to 38 K was discovered both in hole-doped and electron-doped samples. Tc leties from 2.7 K in CsFe2As2 to 38 K in A1−xKxFe2As2 (A = Ba, Sr). Meanwhile, superconductivity could also be induced in the parent phase by high pressure or by replacing some of the Fe by Co. More excitingly, large single crystals could be obtained by the Sn flux method in this family to study the rather low melting temperature and the intermetallic characteristics.")


if architecture.find("FEATURE") == -1:
result = annotate_text(someTexts, model, "json", architecture=architecture, use_ELMo=use_ELMo)
Expand Down
Loading