Skip to content

Commit

Permalink
updated config for TransformersModelForTokenClassificationNerStep. Re…
Browse files Browse the repository at this point in the history
…moved obsolete multilabel.yaml
  • Loading branch information
RichJackson authored and paluchasz committed Sep 13, 2024
1 parent c1c2ea2 commit df54a2c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ path: ${oc.env:KAZU_MODEL_PACK}/tinybern
batch_size: 4
stride: 16
max_sequence_length: 128
keys_to_use: #bert for token classification doesn't use token_type_ids
- input_ids
- attention_mask
entity_splitter:
_target_: kazu.steps.ner.entity_post_processing.NonContiguousEntitySplitter
entity_conditions:
Expand All @@ -13,21 +16,22 @@ entity_splitter:
disease:
- _target_: kazu.steps.ner.entity_post_processing.SplitOnConjunctionPattern
path: ${SciSpacyPipeline.path}
detect_subspans: False
threshold: ~
labels:
- 'B-cell_line'
- 'B-cell_type'
- 'B-disease'
- 'B-drug'
- 'B-gene'
- 'B-species'
- 'I-cell_line'
- 'I-cell_type'
- 'I-disease'
- 'I-drug'
- 'I-gene'
- 'I-species'
- 'O'
strip_re:
gene: "( (gene|protein)s?)+$"
tokenized_word_processor:
_target_: kazu.steps.ner.tokenized_word_processor.TokenizedWordProcessor
labels:
- 'B-cell_line'
- 'B-cell_type'
- 'B-disease'
- 'B-drug'
- 'B-gene'
- 'B-species'
- 'I-cell_line'
- 'I-cell_type'
- 'I-disease'
- 'I-drug'
- 'I-gene'
- 'I-species'
- 'O'
strip_re:
gene: "( (gene|protein)s?)+$"
use_multilabel: false

This file was deleted.

0 comments on commit df54a2c

Please sign in to comment.