Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removes the redundant label_map I/O operations #79

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions BERT_NER.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ def _create_example(self, lines, set_type):
return examples


def convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer, mode):
def convert_single_example(ex_index, example, label_map, max_seq_length, tokenizer, mode):
"""
:param ex_index: example num
:param example:
:param label_list: all labels
:param label_map: dict map from label to id for all labels
:param max_seq_length:
:param tokenizer: WordPiece tokenization
:param mode:
Expand All @@ -251,12 +251,6 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, tokeni
labels: [I-PER,I-PER,X,O,O,O,X]

"""
label_map = {}
#here start with zero this means that "[PAD]" is zero
for (i,label) in enumerate(label_list):
label_map[label] = i
with open(FLAGS.middle_output+"/label2id.pkl",'wb') as w:
pickle.dump(label_map,w)
textlist = example.text.split(' ')
labellist = example.label.split(' ')
tokens = []
Expand Down Expand Up @@ -322,10 +316,16 @@ def filed_based_convert_examples_to_features(examples, label_list, max_seq_lengt
writer = tf.python_io.TFRecordWriter(output_file)
batch_tokens = []
batch_labels = []
label_map = {}
#here start with zero this means that "[PAD]" is zero
for (i,label) in enumerate(label_list):
label_map[label] = i
with open(FLAGS.middle_output+"/label2id.pkl",'wb') as w:
pickle.dump(label_map,w)
for (ex_index, example) in enumerate(examples):
if ex_index % 5000 == 0:
logging.info("Writing example %d of %d" % (ex_index, len(examples)))
feature,ntokens,label_ids = convert_single_example(ex_index, example, label_list, max_seq_length, tokenizer, mode)
feature,ntokens,label_ids = convert_single_example(ex_index, example, label_map, max_seq_length, tokenizer, mode)
batch_tokens.extend(ntokens)
batch_labels.extend(label_ids)
def create_int_feature(values):
Expand Down