From 970cc1fb6f35af89e9236ef4a4b6c91e6bb5179c Mon Sep 17 00:00:00 2001 From: louishsu Date: Sun, 10 Oct 2021 12:53:08 +0800 Subject: [PATCH] 2021/10/10 nezha-legal-fgm2.0-lsr0.1_f82.73_p76.19_r90.49 --- main.py | 8 ++++++- main_local.py | 8 ++++++- scripts/run_span.sh | 56 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 3f7c184..6bddf88 100644 --- a/main.py +++ b/main.py @@ -66,7 +66,13 @@ def main(): # n_splits = 5 # seed=42 # -------------------------- - version = "nezha-legal-100k-fgm1.0-lsr0.1" + # version = "nezha-legal-100k-fgm1.0-lsr0.1" + # model_type = "nezha_span" + # dataset_name = "cail_ner" + # n_splits = 5 + # seed=42 + # -------------------------- + version = "nezha-legal-fgm2.0-lsr0.1" model_type = "nezha_span" dataset_name = "cail_ner" n_splits = 5 diff --git a/main_local.py b/main_local.py index 0859bf1..11a3225 100644 --- a/main_local.py +++ b/main_local.py @@ -66,7 +66,13 @@ def main(): # n_splits = 5 # seed=42 # -------------------------- - version = "nezha-legal-fgm1.0-lsr0.1-ema3" + # version = "nezha-legal-fgm1.0-lsr0.1-ema3" + # model_type = "nezha_span" + # dataset_name = "cail_ner" + # n_splits = 5 + # seed=42 + # -------------------------- + version = "nezha-legal-fgm2.0-lsr0.1" model_type = "nezha_span" dataset_name = "cail_ner" n_splits = 5 diff --git a/scripts/run_span.sh b/scripts/run_span.sh index e748b4e..9a19fc0 100644 --- a/scripts/run_span.sh +++ b/scripts/run_span.sh @@ -565,6 +565,62 @@ python run_span.py \ --seed=42 done +# Further-pretrain LSR +for k in 0 1 2 3 4 +do +python run_span.py \ + --version=nezha-legal-fgm2.0-lsr0.1-fold${k} \ + --data_dir=./data/ner-ctx0-5fold-seed42/ \ + --train_file=train.${k}.json \ + --dev_file=dev.${k}.json \ + --test_file=dev.${k}.json \ + --model_type=nezha_span \ + --model_name_or_path=/home/louishsu/NewDisk/Code/CAIL2021/nezha-legal-cn-base-wwm/ \ + --do_train \ + --overwrite_output_dir \ + --evaluate_during_training \ + --evaluate_each_epoch \ + --save_best_checkpoints \ + --max_span_length=40 \ + --width_embedding_dim=128 \ + --train_max_seq_length=512 \ + --eval_max_seq_length=512 \ + --do_lower_case \ + --per_gpu_train_batch_size=8 \ + --per_gpu_eval_batch_size=16 \ + --gradient_accumulation_steps=2 \ + --learning_rate=5e-5 \ + --other_learning_rate=1e-3 \ + --num_train_epochs=8.0 \ + --warmup_proportion=0.1 \ + --do_fgm --fgm_epsilon=2.0 \ + --loss_type=lsr --label_smooth_eps=0.1 \ + --seed=42 +done +# main_local +# avg +# {'p': 0.9046767380798356, 'r': 0.8917144893289825, 'f': 0.8981488477521723} +# 犯罪嫌疑人 +# {'p': 0.963254593175853, 'r': 0.9653411728299551, 'f': 0.9642967542503864} +# 受害人 +# {'p': 0.9311955168119551, 'r': 0.9623552123552124, 'f': 0.9465189873417722} +# 被盗货币 +# {'p': 0.8584686774941995, 'r': 0.8087431693989071, 'f': 0.8328643781654473} +# 物品价值 +# {'p': 0.9754571703561117, 'r': 0.9698564593301435, 'f': 0.9726487523992322} +# 盗窃获利 +# {'p': 0.8848484848484849, 'r': 0.9106029106029107, 'f': 0.8975409836065574} +# 被盗物品 +# {'p': 0.8189794091316025, 'r': 0.7912125929769936, 'f': 0.8048565898293155} +# 作案工具 +# {'p': 0.7943166441136671, 'r': 0.7986394557823129, 'f': 0.796472184531886} +# 时间 +# {'p': 0.9484612532443456, 'r': 0.9251356238698011, 'f': 0.9366532405712193} +# 地点 +# {'p': 0.874439461883408, 'r': 0.8316747227750924, 'f': 0.8525211308656367} +# 组织机构 +# {'p': 0.8808618504435995, 'r': 0.8622828784119106, 'f': 0.8714733542319749} + # Further-pretrain 100k steps, LSR for k in 0 1 2 3 4 do