From 7a14e5d352ecf45b848cacafe0de414011930c8b Mon Sep 17 00:00:00 2001 From: zhipuch Date: Tue, 19 Nov 2024 09:35:07 +0800 Subject: [PATCH] correct compute_metrics function --- finetune_demo/finetune.py | 4 ++-- finetune_demo/finetune_vision.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/finetune_demo/finetune.py b/finetune_demo/finetune.py index 2552fa0..683babb 100644 --- a/finetune_demo/finetune.py +++ b/finetune_demo/finetune.py @@ -374,8 +374,8 @@ def compute_metrics(eval_preds: EvalPrediction, tokenizer): batched_label_ids[batched_label_ids==-100] = tokenizer.pad_token_id metrics_dct = {'rouge-1': [], 'rouge-2': [], 'rouge-l': [], 'bleu-4': []} for pred_ids, label_ids in zip(batched_pred_ids, batched_label_ids): - pred_txt = tokenizer.decode(pred_ids).strip() - label_txt = tokenizer.decode(label_ids).strip() + pred_txt = tokenizer.decode(pred_ids, skip_special_tokens=True).strip() + label_txt = tokenizer.decode(label_ids, skip_special_tokens=True).strip() pred_tokens = list(jieba.cut(pred_txt)) label_tokens = list(jieba.cut(label_txt)) rouge = Rouge() diff --git a/finetune_demo/finetune_vision.py b/finetune_demo/finetune_vision.py index cafe7e6..056566b 100644 --- a/finetune_demo/finetune_vision.py +++ b/finetune_demo/finetune_vision.py @@ -420,8 +420,8 @@ def compute_metrics(eval_preds: EvalPrediction, tokenizer): batched_label_ids[batched_label_ids==-100] = tokenizer.pad_token_id metrics_dct = {'rouge-1': [], 'rouge-2': [], 'rouge-l': [], 'bleu-4': []} for pred_ids, label_ids in zip(batched_pred_ids, batched_label_ids): - pred_txt = tokenizer.decode(pred_ids).strip() - label_txt = tokenizer.decode(label_ids).strip() + pred_txt = tokenizer.decode(pred_ids, skip_special_tokens=True).strip() + label_txt = tokenizer.decode(label_ids, skip_special_tokens=True).strip() pred_tokens = list(jieba.cut(pred_txt)) label_tokens = list(jieba.cut(label_txt)) rouge = Rouge()