Skip to content

Commit

Permalink
Merge pull request #32 from wellcometrust/new-release
Browse files Browse the repository at this point in the history
Preparing for new release
  • Loading branch information
lizgzil authored Apr 23, 2020
2 parents bbca141 + b51452c commit b301d12
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 6 deletions.
18 changes: 14 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
.DEFAULT_GOAL := all

# Determine OS (from https://gist.github.com/sighingnow/deee806603ec9274fd47)
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
OSFLAG := linux
endif
ifeq ($(UNAME_S),Darwin)
OSFLAG := macosx
endif

#
# Set file and version for embeddings and model, plus local paths
#
Expand Down Expand Up @@ -84,9 +93,9 @@ datasets = data/splitting/2019.12.0_splitting_train.tsv \
data/parsing/2020.3.2_parsing_train.tsv \
data/parsing/2020.3.2_parsing_test.tsv \
data/parsing/2020.3.2_parsing_valid.tsv \
data/multitask/2020.3.19_multitask_train.tsv \
data/multitask/2020.3.19_multitask_test.tsv \
data/multitask/2020.3.19_multitask_valid.tsv
data/multitask/2020.3.18_multitask_train.tsv \
data/multitask/2020.3.18_multitask_test.tsv \
data/multitask/2020.3.18_multitask_valid.tsv


rodrigues_datasets = data/rodrigues/clean_train.txt \
Expand Down Expand Up @@ -121,9 +130,10 @@ sync_model_to_s3:
# artefacts otherwise they can make a mess of your build! Public access to
# the wheel is granted with the --acl public-read flag.


.PHONY: dist
dist:
-rm build/bin build/bdist.linux-x86_64 -r
-rm build/lib build/bin build/bdist.$(OSFLAG)* -r
-rm deep_reference_parser-20* -r
-rm deep_reference_parser.egg-info -r
-rm dist/*
Expand Down
4 changes: 2 additions & 2 deletions deep_reference_parser/__version__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
__name__ = "deep_reference_parser"
__version__ = "2020.3.3"
__version__ = "2020.4.5"
__description__ = "Deep learning model for finding and parsing references"
__url__ = "https://github.com/wellcometrust/deep_reference_parser"
__author__ = "Wellcome Trust DataLabs Team"
__author_email__ = "[email protected]"
__license__ = "MIT"
__splitter_model_version__ = "2020.3.6_splitting"
__parser_model_version__ = "2020.3.8_parsing"
__splitparser_model_version__ = "2020.3.19_multitask"
__splitparser_model_version__ = "2020.4.5_multitask"
36 changes: 36 additions & 0 deletions deep_reference_parser/configs/2020.4.5_multitask.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[DEFAULT]
version = 2020.4.5_multitask
description = Uses 2020.3.18 data
deep_reference_parser_version = 9432b6e

[data]
# Note that test and valid proportion are only used for data creation steps,
# not when running the train command.
test_proportion = 0.25
valid_proportion = 0.25
data_path = data/
respect_line_endings = 0
respect_doc_endings = 1
line_limit = 150
policy_train = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_train.tsv
policy_test = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_test.tsv
policy_valid = data/processed/annotated/deep_reference_parser/multitask/2020.3.18_multitask_valid.tsv
s3_slug = https://datalabs-public.s3.eu-west-2.amazonaws.com/deep_reference_parser/

[build]
output_path = data/models/multitask/2020.4.5_multitask/
output = crf
word_embeddings = embeddings/2020.1.1-wellcome-embeddings-300.txt
pretrained_embedding = 0
dropout = 0.5
lstm_hidden = 400
word_embedding_size = 300
char_embedding_size = 100
char_embedding_type = BILSTM
optimizer = adam

[train]
epochs = 60
batch_size = 100
early_stopping_patience = 5
metric = val_f1

0 comments on commit b301d12

Please sign in to comment.