Skip to content

Commit

Permalink
Update FlairNLP to 2.11 (#9)
Browse files Browse the repository at this point in the history
* Remove tmp dir when failing or finishing

* Add logging info about GPU usage

* Update FlairNLP to 0.11
  • Loading branch information
ZJaume authored Nov 28, 2022
1 parent dfccb15 commit c8e63c3
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
8 changes: 4 additions & 4 deletions biner.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ def entities2text(sentence, entities):
start = ent.span()[0]
end = ent.span()[1]
else:
start = ent.start_pos
end = ent.end_pos
start = ent.start_position
end = ent.end_position
n_entities += 1

if start < cur: # If two overlap, skip the second one
Expand Down Expand Up @@ -275,13 +275,13 @@ def get_entities_block(sentence_block, ner=True):
# Append the entities found by nlp to the entities found by regex
# only keep entities that we are interested in
for entities, sent_obj in zip(entities_block, sent_obj_block):
entities += [s for s in sent_obj.get_spans() if s.tag in ENTITIES]
entities += [s for s in sent_obj.get_spans('ner') if s.tag in ENTITIES]

# Sort each entity list separately
# only needed if ner is enabled
for entities in entities_block:
# sort the objects by their (start, end) positions in sentence
entities.sort(key=lambda x: x.span() if type(x) is re.Match else (x.start_pos, x.end_pos))
entities.sort(key=lambda x: x.span() if type(x) is re.Match else (x.start_position, x.end_position))

return entities_block

Expand Down
15 changes: 12 additions & 3 deletions biroamer
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ FASTALIGN=fast_align
ATOOLS=atools
export NER=biner
BUILDTMX=buildtmx
NODELTEMP=false

JOBS=$(getconf _NPROCESSORS_ONLN)
BLOCKSIZE=10000
Expand Down Expand Up @@ -70,11 +71,12 @@ usage () {
echo " -o Enable random omitting of sentences"
echo " -t TOKL1 External tokenizer command for lang1"
echo " -T TOKL2 External tokenizer command for lang2"
echo " -p Do not delete temporary directory"
echo " -h Shows this message"
}

# Read optional arguments
while getopts ":s:a:j:b:m:t:T:ho" options
while getopts ":s:a:j:b:m:t:T:pho" options
do
case "${options}" in
s) SEED=$OPTARG;;
Expand All @@ -83,6 +85,7 @@ do
b) BLOCKSIZE=$OPTARG;;
m) MIX_CORPUS=$OPTARG;;
o) OMIT=true;;
p) NODELTEMP=true;;
t) TOKL1=$OPTARG;;
T) TOKL2=$OPTARG;;
h) usage
Expand Down Expand Up @@ -113,18 +116,26 @@ if which nvidia-smi >/dev/null; then
if [ -z ${CUDA_VISIBLE_DEVICES+x} ]; then
# For undefined variable use all GPUs
NER_JOBS=$(nvidia-smi -L | wc -l)
echo Using all $NER_JOBS GPUs >&2
elif [[ -z "$CUDA_VISIBLE_DEVICES" ]]; then
# For empty variable use as many as CPUs
NER_JOBS=$JOBS
echo Disabled GPUs, using $NER_JOBS CPUs >&2
else
# Defined not empy variable, count devices
NER_JOBS=$(echo ${CUDA_VISIBLE_DEVICES//,/ } | wc -w)
echo Using $NER_JOBS GPUs >&2
fi
else
NER_JOBS=$JOBS
echo No GPUs available, using $NER_JOBS CPUs >&2
fi

MYTEMPDIR=$(mktemp -d)
if [ "$NODELTEMP" = false ]; then
# Remove temporary dir when script fails or finishes
trap "rm -Rr $MYTEMPDIR" EXIT
fi
echo "Using temporary directory $MYTEMPDIR" 1>&2

# Extract from TMX, omit, mix and shuffle
Expand Down Expand Up @@ -175,5 +186,3 @@ paste $MYTEMPDIR/omitted-mixed $MYTEMPDIR/f1.tok $MYTEMPDIR/f2.tok $MYTEMPDIR/sy
| $BUILDTMX $L1 $L2

echo "Removing temporary directory $MYTEMPDIR" 1>&2

rm -Rf $MYTEMPDIR
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
docopt==0.6.2
nltk>=3.4.5
flair==0.9
nltk>=3.4.5,<4
flair==0.11
gensim<5.0

0 comments on commit c8e63c3

Please sign in to comment.