From 64f4da714a5e301bbcac876e9651dd2a082d3036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=BA=E8=9B=B0?= Date: Wed, 16 Aug 2023 16:59:23 +0800 Subject: [PATCH] fix error commit & update pro --- PRO/README.md | 57 ++++-- PRO/{eval => eval_hh}/dp_config.yaml | 0 .../infer_and_eval_main_generate.py | 2 +- .../infer_and_eval_main_reward.py | 0 .../infer_and_eval_main_score.py | 0 PRO/{eval => eval_hh}/infer_func_now.py | 1 - PRO/{eval => eval_hh}/metrics2.py | 40 ---- PRO/{eval => eval_hh}/reward_model.py | 0 PRO/{eval => eval_hh}/run_infer_main_dist.sh | 7 +- PRO/eval_summarize/dp_config.yaml | 16 ++ .../infer_and_eval_main_generate.py | 95 ++++++++++ .../infer_and_eval_main_reward.py | 84 +++++++++ .../infer_and_eval_main_score.py | 47 +++++ PRO/eval_summarize/infer_func_now.py | 76 ++++++++ PRO/eval_summarize/metrics2.py | 70 +++++++ PRO/eval_summarize/run_infer_main_dist.sh | 16 ++ PRO/requirements.txt | 3 + .../{automatic.jpg => automatic_hh.jpg} | Bin PRO/resources/automatic_summarize.jpg | Bin 0 -> 67617 bytes PRO/train/ds_config2.yaml | 22 +++ .../step_1_process.py | 63 +------ .../step_2_gen_train_data.py | 16 +- .../step_3_gen_test_data.py | 14 +- .../step_1_process.py | 109 +++++++++++ .../step_2_gen_train_data.py | 118 ++++++++++++ .../step_3_gen_test_data.py | 109 +++++++++++ PRO/train/train3_summarize.sh | 28 +++ PRO/train/{train.sh => train_hh.sh} | 3 +- PRO/train/train_summarize.sh | 28 +++ PRO/train/utils/config.py | 7 +- PRO/train/utils/data_manager.py | 172 +++++++++++++++++- PRO/train/utils/{metrics.py => metrics_hh.py} | 0 PRO/train/utils/metrics_summarize.py | 65 +++++++ PRO/train/utils/process_manager.py | 47 +++-- 34 files changed, 1154 insertions(+), 161 deletions(-) rename PRO/{eval => eval_hh}/dp_config.yaml (100%) rename PRO/{eval => eval_hh}/infer_and_eval_main_generate.py (98%) rename PRO/{eval => eval_hh}/infer_and_eval_main_reward.py (100%) rename PRO/{eval => eval_hh}/infer_and_eval_main_score.py (100%) rename PRO/{eval => eval_hh}/infer_func_now.py (98%) rename PRO/{eval => eval_hh}/metrics2.py (64%) rename PRO/{eval => eval_hh}/reward_model.py (100%) rename PRO/{eval => eval_hh}/run_infer_main_dist.sh (90%) create mode 100644 PRO/eval_summarize/dp_config.yaml create mode 100644 PRO/eval_summarize/infer_and_eval_main_generate.py create mode 100644 PRO/eval_summarize/infer_and_eval_main_reward.py create mode 100644 PRO/eval_summarize/infer_and_eval_main_score.py create mode 100644 PRO/eval_summarize/infer_func_now.py create mode 100644 PRO/eval_summarize/metrics2.py create mode 100755 PRO/eval_summarize/run_infer_main_dist.sh rename PRO/resources/{automatic.jpg => automatic_hh.jpg} (100%) create mode 100644 PRO/resources/automatic_summarize.jpg create mode 100644 PRO/train/ds_config2.yaml rename PRO/train/{preprocess_data => hh_preprocess_data}/step_1_process.py (74%) rename PRO/train/{preprocess_data => hh_preprocess_data}/step_2_gen_train_data.py (93%) rename PRO/train/{preprocess_data => hh_preprocess_data}/step_3_gen_test_data.py (91%) create mode 100644 PRO/train/summarize_preprocess_data/step_1_process.py create mode 100644 PRO/train/summarize_preprocess_data/step_2_gen_train_data.py create mode 100644 PRO/train/summarize_preprocess_data/step_3_gen_test_data.py create mode 100755 PRO/train/train3_summarize.sh rename PRO/train/{train.sh => train_hh.sh} (92%) create mode 100755 PRO/train/train_summarize.sh rename PRO/train/utils/{metrics.py => metrics_hh.py} (100%) create mode 100644 PRO/train/utils/metrics_summarize.py diff --git a/PRO/README.md b/PRO/README.md index 331eaaca..b0e355dc 100644 --- a/PRO/README.md +++ b/PRO/README.md @@ -4,14 +4,14 @@ Authors: Feifan Song, Bowen Yu, Minghao Li, Haiyang Yu, Fei Huang, Yongbin Li, H arXiv: [Abstract](https://arxiv.org/abs/2306.17492) / [PDF](https://arxiv.org/pdf/2306.17492.pdf) ## Abstract -Large language models (LLMs) often contain misleading content, emphasizing the need to align them with human values to ensure secure AI systems. Reinforcement learning from human feedback (RLHF) has been employed to achieve this alignment by combining a reward model, typically based on Bradley-Terry paired comparison, with an RL algorithm such as Proximal Policy Optimization (PPO) to optimize LLM responses. However, RLHF exhibits complexity, instability, and sensitivity to hyperparameters. In this paper, we propose Preference Ranking Optimization (PRO) as an alternative to PPO for directly aligning LLMs with the Bradley-Terry comparison. PRO extends the pairwise Bradley-Terry comparison to accommodate preference rankings of any length. By iteratively contrasting the likelihood of generating responses, PRO instructs the LLM to prioritize the best response while progressively ranking the remaining responses. In this manner, PRO effectively transforms human alignment into aligning the probability ranking of n responses generated by LLM with the preference ranking of humans towards these responses. Experiments have shown that PRO outperforms existing alignment algorithms, achieving comparable results to ChatGPT and human responses through automatic-based, reward-based, GPT-4, and human evaluations. Furthermore, we demonstrate that longer, more diverse, and higher-quality preference ranking sequences can consistently enhance the performance of human alignment. +Large language models (LLMs) often contain misleading content, emphasizing the need to align them with human values to ensure secure AI systems. Reinforcement learning from human feedback (RLHF) has been employed to achieve this alignment. However, it encompasses two main drawbacks: (1) RLHF exhibits complexity, instability, and sensitivity to hyperparameters in contrast to SFT. (2) Despite massive trial-and-error, multiple sampling is reduced to pair-wise contrast, thus lacking contrasts from a macro perspective. In this paper, we propose Preference Ranking Optimization (PRO) as an efficient SFT algorithm to directly fine-tune LLMs for human alignment. PRO extends the pair-wise contrast to accommodate preference rankings of any length. By iteratively contrasting candidates, PRO instructs the LLM to prioritize the best response while progressively ranking the rest responses. In this manner, PRO effectively transforms human alignment into aligning the probability ranking of n responses generated by LLM with the preference ranking of humans towards these responses. Experiments have shown that PRO outperforms baseline algorithms, achieving comparable results to ChatGPT and human responses through automatic-based, reward-based, GPT-4, and human evaluations. ## The pipeline of PRO
## Results -### Automatic Evaluation -
+### Automatic Evaluation on *HH-RLHF* +
### GPT-4 Evaluation
@@ -19,32 +19,63 @@ Large language models (LLMs) often contain misleading content, emphasizing the n ### Human Evaluation
+### Automatic Evaluation on *Summarize From Feedback* +
+ ## Running! ### Data Preparation -1. Download [data.zip](https://ylab-mobile-prod.oss-cn-beijing.aliyuncs.com/yueli.ybw/pro_data.zip) and unzip it. -2. Place the unzipped ```data/``` folder in the root directory of the project. -3. You can also get the raw data from [this repo](https://github.com/anthropics/hh-rlhf), and run the following command to preprocess it to get the same data as ```train_len2/``` in ```data.zip```: +We provide the preprocessed data for training and testing, which can be get with following steps: +1. Download [data.zip](https://ylab-mobile-prod.oss-cn-beijing.aliyuncs.com/yueli.ybw/data.zip) and unzip it. +2. Place the unzipped ```data``` folder in the root directory of the project. + +Besides, we also provide the scripts for preprocessing the raw data. Please follow the steps below to prepare the data: +1. Create a directory named ```data``` in the root directory of this project. +2. Create a directory named ```data/raw_data``` in the ```data``` directory. +3. Download the raw data from [*HH-RLHF*](https://github.com/anthropics/hh-rlhf) or [*Summarize From Feedback*](https://github.com/openai/summarize-from-feedback), which should be named as ```hhrlhf``` or ```summarize_from_feedback```, and put it in the ```data/raw_data``` directory. +4. Run the following command to preprocess the data: + ``` -cd train/preprocess_data +# For HH-RLHF +cd train/hh_preprocess_data +python step_1_process.py +python step_2_get_train_data.py +python step_3_get_test_data.py + +# For Summarize From Feedback +cd ../summarize_preprocess_data python step_1_process.py python step_2_get_train_data.py python step_3_get_test_data.py ``` + ### Train -We provide the training script for training the model. For example, you can run the following command to train the model: +We provide the training scripts for training the model. For example, you can run the following commands to train the model: ``` cd train -./train.sh [id_of_exp] train_len2 2 + +# Train LLMs with HH-RLHF +./train_hh.sh [id_of_exp] hh_train_len2 2 + +# Train LLMs with Summarize From Feedback +./train_summarize.sh [id_of_exp] summarize_train_len2 2 +# Length 3 +./train3_summarize.sh [id_of_exp] summarize_train_len3_alpaca 3 ``` -You can modify the ```train.sh``` to train the model with different dataset. + +The scripts can be easily modified to train LLMs with different datasets. ### Test -You can run the following command to test the model: +The following command can be used to test the model: ``` -cd eval +# Test LLMs with HH-RLHF +cd eval_hh +./run_infer_main_dist.sh + +# Test LLMs with Summarize From Feedback +cd ../eval_summarize ./run_infer_main_dist.sh ``` -> **Note:** Before run this script, you should modify the ```infer_main_dist.sh``` to specify ```id_of_exp``` and corresponding ranking length in training. +> **Note:** Before running, the ```id_of_exp``` and corresponding ranking length (during training) in ```run_infer_main_dist.sh``` have to be specified. ## Citation If this work is helpful to you, welcome to cite our paper as: diff --git a/PRO/eval/dp_config.yaml b/PRO/eval_hh/dp_config.yaml similarity index 100% rename from PRO/eval/dp_config.yaml rename to PRO/eval_hh/dp_config.yaml diff --git a/PRO/eval/infer_and_eval_main_generate.py b/PRO/eval_hh/infer_and_eval_main_generate.py similarity index 98% rename from PRO/eval/infer_and_eval_main_generate.py rename to PRO/eval_hh/infer_and_eval_main_generate.py index 76137b5e..ee02f464 100644 --- a/PRO/eval/infer_and_eval_main_generate.py +++ b/PRO/eval_hh/infer_and_eval_main_generate.py @@ -76,7 +76,7 @@ def get_args(): "helpful_online.json", "helpful_rejection.json" ]: - file_path = os.path.join("..", "data", "test", file_name) + file_path = os.path.join("..", "data", "hh_test", file_name) with open(file_path, "r", encoding='utf-8') as f: infer_data = {line_index: json.loads(l) for line_index, l in enumerate(f.readlines()) if (line_index-rank) % rank_sum == 0} diff --git a/PRO/eval/infer_and_eval_main_reward.py b/PRO/eval_hh/infer_and_eval_main_reward.py similarity index 100% rename from PRO/eval/infer_and_eval_main_reward.py rename to PRO/eval_hh/infer_and_eval_main_reward.py diff --git a/PRO/eval/infer_and_eval_main_score.py b/PRO/eval_hh/infer_and_eval_main_score.py similarity index 100% rename from PRO/eval/infer_and_eval_main_score.py rename to PRO/eval_hh/infer_and_eval_main_score.py diff --git a/PRO/eval/infer_func_now.py b/PRO/eval_hh/infer_func_now.py similarity index 98% rename from PRO/eval/infer_func_now.py rename to PRO/eval_hh/infer_func_now.py index 58a295be..89f4ac82 100644 --- a/PRO/eval/infer_func_now.py +++ b/PRO/eval_hh/infer_func_now.py @@ -62,7 +62,6 @@ def pipeline(prompts): text = text_res[index] assert truncated_prompts[index].rstrip() in text text = text.replace(truncated_prompts[index].rstrip(), "").strip() - # text = text[prompts_size[index]:].strip() for stop in ["Human:", "human:", "Assistant:", "assistant:"]: stop_ix = text.find(stop) if stop_ix >= 0: diff --git a/PRO/eval/metrics2.py b/PRO/eval_hh/metrics2.py similarity index 64% rename from PRO/eval/metrics2.py rename to PRO/eval_hh/metrics2.py index 4d232491..72c2c6f8 100644 --- a/PRO/eval/metrics2.py +++ b/PRO/eval_hh/metrics2.py @@ -21,47 +21,7 @@ def get_bleu(hyp, ref): ref = ref.strip() return nltk.translate.bleu_score.sentence_bleu([ref], hyp) -# Thank trlx for their helpful code: -# https://github.com/CarperAI/trlx/blob/main/examples/hh/ppo_hh.py#L115 -def create_reward_fn_1(): - reward_tokenizer = AutoTokenizer.from_pretrained("gpt2") - reward_tokenizer.pad_token = reward_tokenizer.eos_token - reward_tokenizer.truncation_side = "left" - reward_model = TrainRewardModel("EleutherAI/gpt-j-6B", reward_tokenizer.eos_token_id) - checkpoint = os.path.join("..", "rm", "gptj-rm-static", "hf_ckpt.pt") - - reward_model.load_state_dict(torch.load(checkpoint)) - reward_device = "cuda:{}".format(rank) - reward_model = reward_model.half().to(reward_device) - reward_model.eval() - - def get_score(prefixes, suffixes): - # prefixes = [[p1, p1, p1], [p2, p2, p2]] - # suffixes = [s1, s2] - texts = [] - for p, s in zip(prefixes,suffixes): - p = "".join(p) - p = p.replace("<|prompter|>", "\n\nHuman: ").replace("<|assistant|>", "\n\nAssistant: ") - texts.append(p + s + reward_tokenizer.eos_token) - - input = reward_tokenizer( - texts, - padding=True, - truncation=True, - max_length=reward_tokenizer.max_len_single_sentence, - return_tensors="pt", - ).to(reward_device) - - with torch.no_grad(): - rewards = reward_model(input['input_ids']) # [batch] - - return rewards.view(-1) - # return torch.sigmoid(rewards.view(-1)) - - return get_score, 16 - def create_reward_fn_2(): - # model_name = "OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5" model_name = "OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1" model_device = "cuda:{}".format(rank) tokenizer = AutoTokenizer.from_pretrained(model_name) diff --git a/PRO/eval/reward_model.py b/PRO/eval_hh/reward_model.py similarity index 100% rename from PRO/eval/reward_model.py rename to PRO/eval_hh/reward_model.py diff --git a/PRO/eval/run_infer_main_dist.sh b/PRO/eval_hh/run_infer_main_dist.sh similarity index 90% rename from PRO/eval/run_infer_main_dist.sh rename to PRO/eval_hh/run_infer_main_dist.sh index 08858a2b..d33fe064 100755 --- a/PRO/eval/run_infer_main_dist.sh +++ b/PRO/eval_hh/run_infer_main_dist.sh @@ -3,19 +3,14 @@ export OMP_NUM_THREADS=16 id=$1 ranking_len=$2 -# 30 min accelerate launch --config_file dp_config.yaml infer_and_eval_main_generate.py \ --index $id \ --stage $ranking_len > logs/generate_infer_main_${id}_${ranking_len}.log 2>&1 -#10 min accelerate launch --config_file dp_config.yaml infer_and_eval_main_reward.py \ --index $id \ --stage $ranking_len > logs/reward_infer_main_${id}_${ranking_len}.log 2>&1 -#1 second python -u infer_and_eval_main_score.py \ --index $id \ - --stage $ranking_len > logs/score_infer_main_${id}_${ranking_len}.log 2>&1 - -# total 40 min \ No newline at end of file + --stage $ranking_len > logs/score_infer_main_${id}_${ranking_len}.log 2>&1 \ No newline at end of file diff --git a/PRO/eval_summarize/dp_config.yaml b/PRO/eval_summarize/dp_config.yaml new file mode 100644 index 00000000..31c72646 --- /dev/null +++ b/PRO/eval_summarize/dp_config.yaml @@ -0,0 +1,16 @@ +compute_environment: LOCAL_MACHINE +deepspeed_config: {} +distributed_type: MULTI_GPU +downcast_bf16: 'no' +dynamo_backend: 'NO' +fsdp_config: {} +gpu_ids: all +machine_rank: 0 +main_training_function: main +megatron_lm_config: {} +mixed_precision: bf16 +num_machines: 1 +num_processes: 8 +rdzv_backend: static +same_network: true +use_cpu: false diff --git a/PRO/eval_summarize/infer_and_eval_main_generate.py b/PRO/eval_summarize/infer_and_eval_main_generate.py new file mode 100644 index 00000000..b2daeba1 --- /dev/null +++ b/PRO/eval_summarize/infer_and_eval_main_generate.py @@ -0,0 +1,95 @@ +#import some packages and reward funcs +import os +import argparse +import json +import tqdm +import torch +import torch.nn.functional as F +import metrics2 +from transformers import ( + AutoConfig, + AutoTokenizer, + LlamaTokenizer, + AutoModelForCausalLM +) +from infer_func_now import setup_seed, generate_pipeline +from accelerate import Accelerator +from accelerate.utils import InitProcessGroupKwargs +from datetime import timedelta + +def get_args(): + parser = argparse.ArgumentParser(description="") + parser.add_argument('--index', type=str) + parser.add_argument('--stage', type=int) + parser.add_argument('--directory', default="best_checkpoint", type=str) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=5400)) + accelerator = Accelerator(kwargs_handlers=[kwargs])# **accelerator_log_kwargs) + rank = int(os.environ['RANK']) + rank_sum = accelerator.num_processes + model_name_or_path = os.path.join("..", "checkpoints", f"index_{args.index}", f"stage_{args.stage}", f"{args.directory}") + model_device = "cuda:{}".format(rank) + + model_config = AutoConfig.from_pretrained(model_name_or_path) + model = AutoModelForCausalLM.from_pretrained(model_name_or_path, config=model_config, torch_dtype=torch.bfloat16).to(model_device) + if accelerator.is_main_process: + print(type(model)) + print(model.config) + if model.config.architectures[0].lower() == "llamaforcausallm": + tokenizer = LlamaTokenizer.from_pretrained(model_name_or_path) + tokenizer.unk_token = "" + tokenizer.bos_token = "" + tokenizer.eos_token = "" + else: + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + + tokenizer.pad_token=tokenizer.eos_token, + tokenizer.pad_token_id=tokenizer.eos_token_id, + tokenizer.sep_token = "" + model.resize_token_embeddings(len(tokenizer)) + + print(model.dtype) + torch.cuda.empty_cache() + model.eval() + print(f"Rank {rank} is activated...") + if accelerator.is_main_process: + file_name = "test.json" + save_path = os.path.join("inference_res/cache", "infer_generate_main_{}_{}_{}".format(args.index, args.stage, file_name)) + if os.path.exists(save_path): + os.remove(save_path) + accelerator.wait_for_everyone() + + file_name = "test.json" + file_path = os.path.join("..", "data", "summarize_test", file_name) + with open(file_path, "r", encoding='utf-8') as f: + infer_data = {line_index: json.loads(l) for line_index, l in enumerate(f.readlines()) if (line_index-rank) % rank_sum == 0} + + for line_index in infer_data: + infer_data[line_index]["line_index"] = line_index + infer_data = [infer_data[line_index] for line_index in infer_data] + + prompts = [l['prefix'][0] for l in infer_data] + + setup_seed() + generated_suffixes, truncated_prompts = generate_pipeline(model, tokenizer, prompts, add_special_tokens=True) + setup_seed() + save_path = os.path.join("inference_res/cache", "infer_generate_main_{}_{}_{}".format(args.index, args.stage, file_name)) + + for index in range(len(infer_data)): + infer_data[index]['infer'] = {"t": generated_suffixes[index]} + with open(save_path, 'a', encoding='utf-8') as f: + for line in infer_data: + content = json.dumps(line, ensure_ascii=False) + f.write(content+'\n') + + accelerator.wait_for_everyone() + + print("") + if accelerator.is_main_process: + print("Eval on {}".format(file_name)) + torch.cuda.empty_cache() + accelerator.wait_for_everyone() \ No newline at end of file diff --git a/PRO/eval_summarize/infer_and_eval_main_reward.py b/PRO/eval_summarize/infer_and_eval_main_reward.py new file mode 100644 index 00000000..4848430a --- /dev/null +++ b/PRO/eval_summarize/infer_and_eval_main_reward.py @@ -0,0 +1,84 @@ +#import some packages and reward funcs +import os +import argparse +import json +import tqdm +import torch +import torch.nn.functional as F +import metrics2 +from transformers import ( + AutoConfig, + AutoTokenizer, + LlamaTokenizer, + AutoModelForCausalLM +) +from peft import PeftConfig, PeftModel +from infer_func_now import setup_seed +from accelerate import Accelerator +from accelerate.utils import InitProcessGroupKwargs +from datetime import timedelta + +def get_args(): + parser = argparse.ArgumentParser(description="") + parser.add_argument('--index', type=str) + parser.add_argument('--stage', type=int) + parser.add_argument('--directory', default="best_checkpoint", type=str) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + setup_seed() + kwargs = InitProcessGroupKwargs(timeout=timedelta(seconds=5400)) + accelerator = Accelerator(kwargs_handlers=[kwargs]) + rank = int(os.environ['RANK']) + rank_sum = accelerator.num_processes + torch.cuda.empty_cache() + print(f"Rank {rank} is activated...") + if accelerator.is_main_process: + file_name = "test.json" + save_path = os.path.join("inference_res", "infer_main_{}_{}_{}".format(args.index, args.stage, file_name)) + if os.path.exists(save_path): + os.remove(save_path) + + save_path = os.path.join("inference_res/cache", "infer_generate_main_{}_{}_{}".format(args.index, args.stage, file_name)) + with open(save_path, 'r', encoding='utf-8') as f: + infer_data = [json.loads(l) for l in f.readlines()] + if "line_index" in infer_data[0]: + infer_data = {l["line_index"]: l for l in infer_data} + with open(save_path, 'w', encoding='utf-8') as f: + infer_data = [infer_data[line_index] for line_index in range(len(infer_data))] + for line in infer_data: + content = json.dumps(line, ensure_ascii=False) + f.write(content+'\n') + + accelerator.wait_for_everyone() + + get_score, reward_batch_size = metrics2.create_reward_fn() + + file_name = "test.json" + save_path = os.path.join("inference_res/cache", "infer_generate_main_{}_{}_{}".format(args.index, args.stage, file_name)) + with open(save_path, 'r', encoding='utf-8') as f: + infer_data = [json.loads(l) for line_index, l in enumerate(f.readlines()) if (line_index - rank) % rank_sum == 0] + raw_prefixes = [l['prefix'][0].strip() + " " for l in infer_data] + generated_suffixes = [l['infer']["t"].strip() for l in infer_data] + + setup_seed() + rewards = [] + batch_size = reward_batch_size + for index in tqdm.tqdm(range(0,len(raw_prefixes), batch_size), desc=f"Rank {rank} rewarding..."): + if len(raw_prefixes) - index < batch_size: + batch_size = len(raw_prefixes) - index + rewards.extend(torch.sigmoid(get_score(raw_prefixes[index:index+batch_size], generated_suffixes[index:index+batch_size])).cpu().detach().numpy().tolist()) + assert len(rewards) == len(generated_suffixes) and len(rewards) == len(infer_data), (len(rewards), len(generated_suffixes), len(infer_data)) + + for index in range(len(infer_data)): + infer_data[index]["infer"]["score"] = rewards[index] + infer_data[index]["infer"]["bleu"] = metrics2.get_bleu(infer_data[index]['infer']['t'], infer_data[index]['suffix'][0]) + + save_path = os.path.join("inference_res", "infer_main_{}_{}_{}".format(args.index, args.stage, file_name)) + with open(save_path, 'a', encoding='utf-8') as f: + for line in infer_data: + content = json.dumps(line, ensure_ascii=False) + f.write(content+'\n') + print(f"Rank {rank} completed!") \ No newline at end of file diff --git a/PRO/eval_summarize/infer_and_eval_main_score.py b/PRO/eval_summarize/infer_and_eval_main_score.py new file mode 100644 index 00000000..7fddc118 --- /dev/null +++ b/PRO/eval_summarize/infer_and_eval_main_score.py @@ -0,0 +1,47 @@ +import os +import argparse +import json +import tqdm +import evaluate + +def get_args(): + parser = argparse.ArgumentParser(description="") + parser.add_argument('--index', type=str) + parser.add_argument('--stage', type=int) + parser.add_argument('--directory', default="best_checkpoint", type=str) + args = parser.parse_args() + return args + +if __name__ == "__main__": + args = get_args() + + file_name = "test.json" + save_path = os.path.join("inference_res", "infer_main_{}_{}_{}".format(args.index, args.stage, file_name)) + with open(save_path, 'r', encoding='utf-8') as f: + infer_data = [json.loads(l) for line_index, l in enumerate(f.readlines())] + + bleu = 0 + avg_reward = 0 + predictions = [] + references = [] + + for line in infer_data: + avg_reward += line['infer']['score'] + bleu += line['infer']['bleu'] + predictions.append( + line['infer']["t"].strip() + ) + references.append( + line["suffix"][0].strip() + ) + + rouge = evaluate.load('rouge') + results = rouge.compute(predictions=predictions, references=references) + bleu = bleu / len(infer_data) + avg_reward = avg_reward / len(infer_data) + + print("Eval on {}".format(file_name)) + print("BLEU: {}".format(bleu)) + print("Avg Reward: {}".format(avg_reward)) + for key in results: + print("{}: {}".format(key, results[key])) \ No newline at end of file diff --git a/PRO/eval_summarize/infer_func_now.py b/PRO/eval_summarize/infer_func_now.py new file mode 100644 index 00000000..cf7e63f0 --- /dev/null +++ b/PRO/eval_summarize/infer_func_now.py @@ -0,0 +1,76 @@ +import torch +import torch.nn.functional as F +import tqdm +import numpy as np +import random + +def setup_seed(seed=42): + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + np.random.seed(seed) + random.seed(seed) + torch.backends.cudnn.benchmark=False + torch.backends.cudnn.deterministic=True + +def generate_pipeline(model, tokenizer, prompts, add_special_tokens=False, gen_kwarg={"max_new_tokens": 64, "num_beams": 1, "do_sample": False,}, batch_size = 28): + def pipeline(prompts): + tokenizer.padding_side = "left" + tokenizer.truncation_side = "right" + + new_prompts = [] + for p in prompts: + assert p[-7:] == "\nTL;DR:", p[-7:] + p = p[:-7] + new_prompts.append(p) + + model_inputs = tokenizer( + new_prompts, + max_length=512, + truncation=True, + add_special_tokens=add_special_tokens, + ) + truncated_prompts = tokenizer.batch_decode(model_inputs['input_ids'], skip_special_tokens=True) + truncated_prompts = [p + "\nTL;DR:" for p in truncated_prompts] + model_inputs = tokenizer( + truncated_prompts, + add_special_tokens=add_special_tokens, + padding=True, + return_tensors="pt" + ) + truncated_prompts = tokenizer.batch_decode(model_inputs['input_ids'], skip_special_tokens=True) + prompts_size = [len(s) for s in truncated_prompts] + return model_inputs, prompts_size, truncated_prompts + + model_inputs, prompts_size, truncated_prompts = pipeline(prompts) + text_res = [] + for index in tqdm.tqdm(range(0, len(model_inputs["input_ids"]), batch_size)): + if len(model_inputs["input_ids"]) - index < batch_size: + batch_size = len(model_inputs["input_ids"]) - index + + batch = {key: model_inputs[key][index:index+batch_size].to(model.device) for key in model_inputs} + with torch.no_grad(): + ts = model.generate( + **batch, + **gen_kwarg, + pad_token_id=tokenizer.pad_token_id, + ).cpu().detach() + text_res.append(ts) + + for index in range(len(text_res)): + text_res[index] = tokenizer.batch_decode( + text_res[index], + skip_special_tokens=True + ) + + text_res = sum(text_res, []) + for index in range(len(text_res)): + text = text_res[index] + assert truncated_prompts[index].rstrip() in text + text = text.replace(truncated_prompts[index].rstrip(), "").strip() + for stop in ["\n\n"]: + stop_ix = text.find(stop) + if stop_ix >= 0: + text = text[:stop_ix].rstrip() + text_res[index] = text + + return text_res, truncated_prompts \ No newline at end of file diff --git a/PRO/eval_summarize/metrics2.py b/PRO/eval_summarize/metrics2.py new file mode 100644 index 00000000..a6528c7c --- /dev/null +++ b/PRO/eval_summarize/metrics2.py @@ -0,0 +1,70 @@ +import sys +sys.path.append("..") +import os +import math +import torch +import torch.nn as nn +from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification +from transformers.models.gpt_neox.modeling_gpt_neox import GPTNeoXConfig, GPTNeoXModel, GPTNeoXPreTrainedModel +from transformers.utils import ModelOutput +from dataclasses import dataclass +from typing import Literal, Optional +import tqdm +import nltk + +rank = int(os.environ['RANK']) + +def get_bleu(hyp, ref): + hyp = hyp.strip() + ref = ref.strip() + return nltk.translate.bleu_score.sentence_bleu([ref], hyp) + +def create_reward_fn_2(): + model_name = "OpenAssistant/reward-model-deberta-v3-large-v2" + model_device = "cuda:{}".format(rank) + tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer.truncation_side = "right" + reward_model = AutoModelForSequenceClassification.from_pretrained(model_name).to(model_device) + reward_model.eval() + + def get_score(prefixes, suffixes): + input_content = tokenizer( + prefixes, + suffixes, + padding=True, + truncation=True, + max_length=1024, + return_tensors="pt", + ).to(model_device) + with torch.no_grad(): + rewards = reward_model(**input_content).logits + + return rewards.view(-1) + + return get_score, 140 + +def create_reward_fn_3(): + model_name = "OpenAssistant/reward-model-deberta-v3-large" + model_device = "cuda:{}".format(rank) + tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer.truncation_side = "right" + reward_model = AutoModelForSequenceClassification.from_pretrained(model_name).to(model_device) + reward_model.eval() + + def get_score(prefixes, suffixes): + input_content = tokenizer( + prefixes, + suffixes, + padding=True, + truncation=True, + max_length=1024, + return_tensors="pt", + ).to(model_device) + with torch.no_grad(): + rewards = reward_model(**input_content).logits + + return rewards.view(-1) + + return get_score, 140 + +create_reward_fn = create_reward_fn_2 \ No newline at end of file diff --git a/PRO/eval_summarize/run_infer_main_dist.sh b/PRO/eval_summarize/run_infer_main_dist.sh new file mode 100755 index 00000000..d33fe064 --- /dev/null +++ b/PRO/eval_summarize/run_infer_main_dist.sh @@ -0,0 +1,16 @@ +export PYTHONIOENCODING=utf-8 +export OMP_NUM_THREADS=16 + +id=$1 +ranking_len=$2 +accelerate launch --config_file dp_config.yaml infer_and_eval_main_generate.py \ + --index $id \ + --stage $ranking_len > logs/generate_infer_main_${id}_${ranking_len}.log 2>&1 + +accelerate launch --config_file dp_config.yaml infer_and_eval_main_reward.py \ + --index $id \ + --stage $ranking_len > logs/reward_infer_main_${id}_${ranking_len}.log 2>&1 + +python -u infer_and_eval_main_score.py \ + --index $id \ + --stage $ranking_len > logs/score_infer_main_${id}_${ranking_len}.log 2>&1 \ No newline at end of file diff --git a/PRO/requirements.txt b/PRO/requirements.txt index d520bbc5..edce7075 100644 --- a/PRO/requirements.txt +++ b/PRO/requirements.txt @@ -8,3 +8,6 @@ torch==1.13.1+cu117 tqdm==4.64.1 transformers==4.28.1 deepspeed==0.8.1 +evaluate +rouge_score +tensorboard \ No newline at end of file diff --git a/PRO/resources/automatic.jpg b/PRO/resources/automatic_hh.jpg similarity index 100% rename from PRO/resources/automatic.jpg rename to PRO/resources/automatic_hh.jpg diff --git a/PRO/resources/automatic_summarize.jpg b/PRO/resources/automatic_summarize.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1646f250e3107eade1996f6f6271a3a9c8faa137 GIT binary patch literal 67617 zcmeFYcUV(Rw>P{)@4ZOxy-7zwA|g#hdKV(S89EX)CJNG>@~Ay&szI8Yi5o<9W4V~=gh6l z0T2iPEUEv1BMflb3>Fdy05&#&G5`Qf04OyCprcaMB7iV80|3oqntwPnagS;Lp+Td+ zZysUUMXgLtA$AV7=2qv;{u0g3<`WrtnT87hpy9F64i;x5-P}DS8P}-NIRRFH9^m)! zi-|O`vpfG=!avvF^uH2e`mYuMlWM5Y^N>BZ!gZ`$!{ic2YrpbSmadI#P06MBxOl&?r(Ln&f zCQ9W?UhxZ}%3(92(#OO7L&B-_@Aeu)p%MO6x{69GQp-eA>EH4VeE*Z2p#Mbs`1t;j z>EjdhhyGg^>XX!p&xXWAUiL}&>yQ68f1sD+sQvXTc_@_Nr zgH8YNqvIU@v_Bx+;>DY z&e{DVpK9ZFfFxiBXaV|wHV_pX0Hcmo05FY+Oo$E%3XYXDp;}jfq*b_|x~insaV>2C z_-(7d%K*TSPk-4UNPy{Iyr_@VAuK_?x%w}j&j$c#uK@twU;pCC9H$N=s zKqhb-CJjem$4!Q&i0)>O(KyXkJC=-+qDh53Py#&>QT0vc)cc5|5EC>Vo z4%!7B(9qJb(eTlT(a6(i&=}B|&{)$r(Rk4W(L~W)rMX3uLvxSjDNPkk6HOP*FwIAr zWtt6|Jz9X4l~#aOidK!*fYy}OmezwdkT#kYL7PEaMEjJshPI7%fOe90nRb)*7aao~ zFP$Wv8l54XC7m;!A6*n3f-Z}$n684ZiLRG!k`67cq%5aS#i{TLiis3E8IKv9V9wQ^85Tgp? zDMnjHAI4b56vq3ERg7rH3C2}MA`>f<1d|q%Ig7qcAm zNoHGSf93?{T;}J@ZOmiLtIQ-8ZWaZWQ!Gv_p)5C9?y=Oc^s#(q`N7J{D$RO=)t)tk z^#krY^H1%+2Yu8*vi@7vdyynU}tBSXFtvE!5+(=!~T-JhkcQq z$id5@&SAyj&w=E4z|q7p!LiNB%qhod!s*R66dH8ksUHD=A5BT5kFALBL$O~8rTo%X`s1ukLI1m&QG!YCC zOc$&XoDkeUCUWfbv4CS4$56*U9y<_{6fzSE6Uq~67FrOd6IK?s7mgP$5$+S-6yXyw z6!8z;) z!O2R<+Q`CXD`n^8nB@%Qg5~bY4an`wE6BUbr^+|We^WT7V5M+Xp;FmcMXgc| zqb{I+UOiF0S$$hWR>MOhPh&uXa$NWL<>Swf&uj8%p3_X!Y}4G;Qr7a*dZ0C>&8}^( zjnr<@#_Oo)1n89LeA4CBwb4!0?bbci)7OjEtI_+eFROn^|Dpbj0iS`LL6*Vb3HlRf zP9RToocMK8?_})Bx|2JGYKCEkuMEE#DHsJ9JvUl8C4I{0)RR-o#*)UDj2{~>gQdVe z;HO~hX_?dhr(c|2Gf^@LHK{V$K6Cs`^qIyp`=%#NubaL-OMBMhY{uD9GafT%vtqMF zb7}J+^Gb7^g|5Xli?^2ame!VemNQmjR(@75t#Ie`&s{&)Yt3fuXkBcLJ+FK|`h1%W zjm}zHce`-9Ty%NqvhRAJr(<*5|3up0AbfLtmVqso!0{4gb^r zcl^HxoC?SfSPMKAm>>8p=u}Wa(Dz_)a8dAPh-pZ12tL#@v@~=-%r>k%?C7%VWfYVN zdI{PZ&J!LM-Wwqv0gD)qREbQ9T#h;!RTzbfwvK)g1B&sAX^G{df};0vigC$tm@CFt z9$X>DyTsSSxMAV2_X)}g=?QCB&99bSqr2vJts5={zX``&H@W@<0Ydm9x{)%-TgcTL z7B?zxvfK>2Ig)rhF+cHVl50}iEwNiSZmlF+Bv+ZL)r{7Pf zWcX(cWggEg$|Ps`WesI(X5Y&`$_dOF&DF~-xy^7p{PuJnIIkj~JO66_N`Xy5(;dk> z8FzjbLJHp%=@gaTWw{%7clqA=doA~6@87;pE)FjK_~6U~)I-sS=@0iG`9GQ{IbBjy zDps0Vdhj^p@uw#iPnw>}KP`I3@a)R7ug_hc50n{{y?i13BJ;&zd3gD9g+oQ}OZ}HG zUx~iTt)!`ptNdPdvFbzh+3J=WwVEd=0aWH|;C0;V_1a6d({;O6BP_m=V2bFJNNhHZ`Q>h0wnQXLQ8@W09JWa+%sd4#@--g_JQcC#y} zYo!~~J>TQjGu3O~JKAU6H`s69-!ot`fF3+G*zxYbN%kf+p3F$Tcc^sOMzN-RK7t+{Mfybh{DlQ*slPOV)Op8lIRLP` zQ?UaTL$G-Mjivu80s0&NQ)$qD(qfW*^*QxgbI&OL%DNPdwUV9%#=_o#}n|V)|WX{xf@jHy;WVL;x6PZ~A z1dj;`%gHM!DkggMtIB9y;%-q7#>YSsKvx_Sgp85F)1O^3%gvP|iU5SS!Tur)_ zoRXTBo{?X0r?BYmz5B&apFJ;oQC{)#Rb72UV^ecWYg>0uZ(skw;JcxT$*GUiGoNPX zFxZtZt6$f?eP74#{`k2^*eCuv_^lTRp#58|KbrkFy|}4*(a_P+(lP$l3q%u7&9vNf z^irA(JSO&xK2f~VS~r>a&g4C*?P8YEcEIubMvt=y$m(F^@V{02OSAu(Vu}Apn*CAi zUwX|0rU1=fiI#?jmY$ZDmY#v0ni!a<$cur2iG}H}#PT1B?XSe~J8}L=M^qsosth_h zI!5Y$E>>n%uK&mBXn~5L6_4fsHd+vMFwt@YVBnDaG(!>i|8S-~`tZArE5_0=TZw1%zVu z?0@4t<{bCgr*mSHK93!zAICJy%`_qX#DSwVilNoBZ?T*&QUgH2hoNALsp(QkD5rl`X&K+Y*&DvIgF^ zL*~vbEng^$UuL}@kd9iOhWZB!D~;4@pOOAy&-S?Ddiz!XtgLb}zim&O{w=VNF0M3H zt!UV4T;r#;iUD7Kt>-*qTb4duR#{OrANIBJ2v~mT&5An$dXYp0@APF(eiK5qM6dk@ zK`<&s*+b;P#=WI5b(SU$QyL}%$*(uS7gV>+Rv!w6>ZPx^0~^^r29>(x1G2$k*%1(d z6hTZZ@f-mGW2xRc6MLSetm@4HKG!Nkri(Vhi)|+Q_}`Iq(IYC!-1L#-NlI578(0WS z{P0WE&M1JZ5NVgiIQuBVtsgt3VdcH9E_V-#3VniJE`}VOOE5kiyyd)U_P%U&R;JRN zNAoNDm)jtcA%vN@L0Fn3^OJmt5W=}5;Fdap62a9+Yg|)s*{v8OepV@BwNy`*2|jDo zH=DA&dE%DclMfHKd+6i_da^v2n=0E2ZHC~33nO~n>vC>Qsi(v-1vB+;!e@9Q|g7Ly5hxa zFc++njHQ#L`K?FegK|AvJ(u*BIja!#h|dCG5o)ESX~_CARANLdl4wHzRoChTM>9kJ zcc?#)$?D6bYsTN|c3Mpa9s8~?Kr$hnfRj&y<(fCd=KD#lpoewcz zN971$ge9^YkM&F!2R@v$_NaGfb}=>LScZa;3v_;?TL@@?F86_ zu{zmqMr|*1iDxVpXAx0eeWM{~I;a7b^G?+IzEnI99IXO4afPi$5S3FmC-DW?!qS_a zI%H1k2Z!;P$i9!Gtg7y{>Z-jrv=as9;EAL=x3h zQB1J@0Zf1T$(}f@Ex*_Gg6j>AXZBg?IN{>cY9Eq&^JRE;CGGSuZ^0faBV=ZL*jors zV=4bP1k2QnV`-O1D#7K&c%p9(&b-d(+@a`;EmZS%i*f=3T8<)t_=x7AXoR2};y2ez zdHkjtA#@=gIhB|(;qRIrLOVK@`ZTBeX98;T@>a}B=pEW;Q-qvmxQ_rj?+->X6)?ZP z5~T+Gt)ddKhUpAQaH(&iGrGiDvnjAWGAQdwfT7)uD?g?_Fz%eH_H z;?O-{ez>aTB-|!||4VSMcK3TtS#=8$>}{Qh9DOC!EihCfQ4i8!(XVERvFS$5?Lkt# z6$@Ivk- z>+5Kw+v?0tNo<)P&z$NQ`F>i$4>5_xIYDR4;&DrYZ zCJM7}gO_4dBZPc8>6=!h(UwO1uZYh)aUV-wuBb7?bt^Bh+LTw%dH)i9^!R;7#(Q(- zx#OHbN%^nnLo`Bs1HwUu;sR&-h)BV(1ILX{$=gVA)T{OA&kGu)TJ1E)aT)tL^d<1} zQ*e`QCOF7+xikgMwq5nJln=HZ^EGt;TmJla7c1$rK}u5GMXF!Ep0;-u&C6#34)sy` zwZz8dQ?}TWGF;GwsiL*Mt9p+FDzAkdQ&rnqL#l%{2ZrkO*?0|`Zuk1!DSe-tD$$6} zcyd@%c37l}P<1EU;l{FcLS#R5IMQ7W7#Y3$D$_kcNhV~NziM4rL0T_2QjMpeIVvIt zy!_xKV;?6s;uDg;WDF~}P_|ttn_amu7DWH^V9GA^T&^f8s-@>^@(CCvU~CBXm~(QB zI@Fs>$*SnN7^lOcy`9v3aUncMmWJ6#*OYJj4c+1tsVirblM-1T=5Wdwxqfp->Kh-M zq$q?g;argE!miym9zCaLcH}<#3Ku=OJd-#3cKY2{8?hFDo}p*r9)@)9hW%+cr;mWw zL0e(?nLcm>xDPVhOt^`s6IXiBMe9_2%FV+haB$9S-Tk7Jz=rU#QslkXGsB7c+&2KQ z-#1jh#!Yl1;}$&$3QJm1NbeLT_S>dV1c}!}L|}-~8Qa>nd;)g)Vq-^f=3hI#O6v7 z;iT9?Uw?J%E1SLEp=9ZVyntVkG5i$XrF7X7>V0j8 zXs&Z2b`a%Tv^eChHbc7b)%=J4h$^%o;LsG4=uWH_Vq+>=-w<_C1%y9}6Ye{zKNtC^}S~mUAyqR)3m$9$p?N7(PugooNmEi@k zcN2{ucv}oO2TKJXPR+v@6lZyI&0f&7^yF-z$2L)>9|aq7(Mbxs!-nhFB4G4a&`TPsGh;%lM!_o!FPpCr89RZG$jkdEQD{Kb= zpI!4blJ^2MSgSHbHfti!*DX`>je+gE z0k8liKAypEmC{xbwlhuA+t>&VyR@{EK(}w-nuIz%oLms1Xwqvt9*q~c*AG{%5ty z_93-7sigD7cw1ZMuoCf);5xGB3$KuX!YA==6NHc9QPMOMX1hGDTE`c5{cj*f#vz4E z)QQ`c0^SfKk=;qR-g%6e?T_Z@RIv#Qi;Gb&4U_w<**1R!l%Ki4*C#zHw)_0-RT?3u z>#o;Uz-b4~TLDnqMt9p6BrXkx?8&SorcBM5$!5kR=-dv$$qle&AI}MK6UF!*dm9-{ zyB?~7|A8&tQXn~#wGK<;Z08iP#_ccQ#&{>;Rxj(WARfEED{W}rat8kVBTh zLRewBJ!&t#Y{e@duj@q))6GO^-BT!{&v2EtGb)SFx!o&8t9#&M1|W@>>eLJIyxm}& zy=@v&knD_`?7wPz3mzX|(^J0m85ZPZIYY9w&$$-Vas*h&^Z7&WFR_Y8uR8IZ+Muv3 z9{~-kJ1HF^n5CpECqFJF#mQW0!+z7WQJomO*UNqJ@R5fJtJ3wy*;z|49xm6->wF;c z5d0iYBCBC%ZeTgSZ!D#>zC5EtYWWmbunn}#r0fa$UdT+gZqJCQx{FH6XXMKI*?7QO z%I@V^W=Wf^dWe@~(cDxe#)b1U{br zgVfb!(h(44wM?x|ypUR(-8AClI+(p9AY$3;!Y*N2&89>5)v?t3egwDGOKQVRCC(DY z$N5q&t6NfTR6OQf+b1ZE8_U29DJ=AON9;}}MT9tDhAYn=NcZR_Z1h}x7)<}QO#BJc zUA$Tn--J?0ilmUcWnHHevxU>`Xvh%J3vno&yEkhwHF_xU@8`$PJ~q-{Pg-L%%*S6h6q z+;>;~-C0wwllBEvZQvyQK9(8^Is%d)O&Yg4%$^Y~dc`$}$T*CCioNA9MB~=PTbDa~(GCbZ|bUj2g3`r{Gb>Pg5Q7y8Y?bp3k|5 zNb6bT9IWrMA~lPstBw7?_+*mpAYlnqk6S=GN%kZPl3a;0gbHHZ@&-DsQ@A$O(&ij1 zOnf^+|9rExkOOSoKfrzKnN0zIfIIVN0}=G&3`N@IF?!g1AG*Oc$tc`N3YYN%GnUdQ z!M-khy>TE7>N^=1c6ec1I_>uS*^#CR&0%X9_~sCIWK|f(mMEt}zDzVzNJFqtT_}t0 zJae2HG$LXTdTwD;XG6rG`n?0Emh<}=c4_H8zEIN?buVXTaQl=U$c*oQ%ZzKOy_}gm zAreh@lXZJ>64|<(oWaK8(+!4O9%J1H)gyOYk@mH-MOp1%#|w9)L_T> zqm*iTCb*eCjqK6}g_O96WF(tW0G8dbi$K%AzD?CWEJE$QYyRK{U3NHK1%syYSG_^S z_a{(3RBw2f%tsBin8J{lwJlAYWA{P-6L9L&V(MhavSXqKsvGQQb*t7gmXtOTPetVd79vm$7Wy6m%<2C6_%k z@9fjBaydc)?BT+S3wL)6L`>)jH!Q13Vni7%J!k^}|TY_Cnyl9L*y5;Gmx?h5c;N{{ap?QKJzH+_TbPjGh z(+Jh@s{Qaq^5XRZ%Z#sb>6FW|y93t@({e`&%`Tr%Mo)NAeeVL<1RvkGln%k!aVB9ly3XQDHHUqzQM!jkYfD93wid>kG^X_orXKbQz@{v0bB9PDOynRvrn-vFb#% zLd$NlT+zMCx%p@Xoa3?Iv&4-UD5VCf;MM`JJUtePh~Ylkb8lKFbyFe`7dEr!Qas>V-C+=0|KXBs7twse98=k`3K&!XJ7a@G9AL*h{4YO^bB+_ zSup#;%gPsL=wB$6y~O%)e)!@;UQz^gnDZbeAY7g5h;cN%F?+}&6H#o%I)N;My)Llf z;JKwUZ+LNbZnT=e;c~x~6lVBh+W-$dqt~zya_O+s2;vik*Itf2D8JF+a%bk?z4hgbse;uml~vph3u(Is$^nX6O5h zg}q@(=b9!Wen#+ne75zk^$_icer+_4R=V73Hm6_Y>lw)c`iiUz>d&1Wa3vO269dQ2 z;A^t+GFZEyj;W6L^6ExPBY`z0&9CV5gE+3YRW6DXaxCS*ZI$jLAW$Q{R2=CuFGh9o z#9UlNU&)DypKr{+LfAto!jmJP1??^lLU>LDq;uc>k<7Vo70 z)DTB}peRl3Wt;Z!jKq|M7)#XV51(snFJ^2Ednx|aeLLvBPm$ZCYFrK=O- zw?MdO7}Q&hSq&_>d*@~+XsWmK#Oo=wt%fDe?`=6%KOhU9OmzA8T##~Sl7}q^pQ|%? zCLM56-=H0$PeLS|!bBNHuoDCDw3wh?>j*Alf`$W@$w;E|LiL*W*vVsG-1J1BXHI2_ z=3YHWB_t60@O~@ty|yB-q+Vmz8LEF{HA@^CFnf(>csMOMmp$1~=jGitbn_!Wo&8gl zy4}8O!E1!f46phe&c2~;A=g+4} zmk3F7im$mWG(5h{bxMYCb{ZTkG0ThPs3j@Gy>O_s4tE@OBARjx?|uKd_qtvaZgf6h zXl_pF%GqDT8uHBgwD<0NAuk0#=IjF}cAg;4lNezeSjH|YqLsm0?V zx#|9rrmh{mf*(K4@=Cw~vl6++QS}0C_7ltoTUkMOEeU=@iZ{gK_E5P#ZH>+2{P|(o z8a5>$HeakiW_vjNQPZQ^A}5D)0ug1@+PZCTmDXDHA%@hW0%L%;%*#ZNhUDV{PQ&Q5H6h8S&L;xW3KVw(x!svfP3 zBlK#Xw4#4H7jpYND)Hu}6ume}XoBp>^;znt;>2_lM93(FBo22XCSWBJy|q44^uwY% z7A5iOW#yP~wQ3Z$HOD|*K1%0_w5_^|wcD)8a0Sb(aSuMXm!eI)`4};{f8g4U5~G?2 zN8E^r{q$2RwB+d!xEenzU}e&r;SB9p(vv@1<{D+@dSDqvl^J(rTMc zsp~?ehaKA|w{`AsW30SKzx9Tz%}(+1yZ4YJSU zXNe$=`c+JmoHhi1*Y{iqgo^^zdY`R?!d;Ii%#6KK+RPmIkM;Z*@7} zJpvT$j{tVFO5`{fF_Q`)Dm5qoCA)4cT>n7`_QGIXN{$) z%aE^zkAQ;8^*Ke_RSjaOnkOYKwHHF>cSr8VN#LeDnh&`-)6hiax+CDr`ax3$@XMMa zoau4|Je9%=QsIpYg#5^YtSeA`1e`U+YEW3J(7#T0R{y(!|L(zmkHLQ%$-mvq|J+e? z#CIbk3;Mi8U*p;4YzQg)Vz@ciatpH1#Lu_J6u% zlor267$cTHJpy_{;MznP{7kPtMTn?f5nO#eis1RKVZ(FlLWsI_c0^6}oW215-U=Nf zNGRz9Q*Z7wBQQyK1Iz{vu?65~iOyK|7)N46LclBcrEmLfeWPVQFN~(UJ+D>THrmKA zeT5Ysqxs}p7Fo{v4J?QdMNsFbP5fh-2zvP0$&u~GWhFKEJ3%gp+oN-{>Qb%s_ri@Y zCMm|#WZ|{ij@y@xqHl1M!w!>?mr-np!Gp2%GJJXWP8yeuaqe15i0U2L_u0Zci`KGv zeD}G2ozMPyLqzJd`ooK2PTg`8<|ZqkC_pqM+bnj$B! zb&=2(p%oDr>Sd=YWvG+wHJ+a}8wM0pB(3DIVX zLhH0Qu8L-mhToRft|fH}P(BF9_m|Qvt z9MT059AA_{?6*0Sz#)*?;vE9)FsTuh=8Dk8d+qeG3y+zHRa^i|%3O-81PhN5EuX;WgA|GIjN%92ZaNV`rs?AbcNl zKCO6~0Vw{rtfFxd8ldB4LN*lT4IPe9s<*xyyc~8Tb;#X||Gr%(Ex*I*cR-1XLk2*O zJwzb`-6=w3o~cefRG+P&H$14RrH;h$M4(Ko=E=(iTTwsT`pGBVvJL{$$9(DUfLhur zY1fyid#CzyvnUKiT3jaP>mxWEObt=)5i&8P`T6Kdx#Yh0QQ2p#qdORR4YJ*%8cqaA zCFV_2J}=ca-!wLYarT0_0zc~7X!IjQUYFWWY^S`8owgeZ0L7VvsAfmdSgmI7u$YrN zqkF$dj2!{#SNF5Xp124CC-D$(obaV(yu&qs<5}DZRsS68`S?U^AVr~Qap1jbf?>K* z!D$I+#K!}s!_*FiW!u~(`kgofXve84_wStwwXIX?8q(80+Z$C>?l#u0 zUZ%aBn#8@8H$fLBp>xY20v>vm@(*5oBXDspEWArLQJaTqTg%SpY020*>+9b{yax*- zRLH8-kN7oW`HEnlb$cnd4JPl^7^eM=T}!FMB$xE}iTD18+waN)ncVVy=bQ@X!nVh9 zmg@I=cIH&NHI`9nBQKGGb%Qh^2ZbLqiVBV;NT-;kL?@e=7)2{T#3Y;`dwoDikj*B* zerW2V9U-XmV|a%NbtNsiEerhV?uEYjO+s`v8CS#nNk9d*XgRa$lJ|CV?oEC zVO^eML`zn$O=qmQyieW6n(?2OOIXN<_?=B#EI7NA2X2Gs#k%&P1c>1x^LZQv&xy9q zB=)iwkHRxW%^`Y1`eT~xs`u>pOM_MUcn|2kq4-R!@dI2c#x>JeVV!!Mn=Rz7>nig= zF|OakAhjd6k+GEz^lOI1L)PxZ&1?zbyMOd}>*DpZ;0nu*jLAR0;>BzByRW2cVmcfP%$JtJG>`hLgdC$Um`a`iI|5_4Ix0Uhh6u1H5TT>@#3?|i|e6w!@cZh z*>9#RC^O*N)Sj`q2R(v8y(e?ZUl0?AXb!xlt`?4cst%WSuph}BoG#V7GPuDr=t}GY zZg55(mewGoz2TfVxtYF}MX-?dGk3kJx(av1v@hiN&?~^5d36e)^#}DOXl(J4P7u+A zbi8Zo&y7-vPznRiJQ=DpflAd79&67)Qkc;zNKv@S zVGfxUtC1$C4Ousdfcmw!`u@1+>EUI~roNfr%S{t*)U?o>AB|Y0&dWDYv&9-I=%%19 zbn+q{@#V@Ax3=6;2Hb?B5%u7n^rX(TeeerK-&E?h9!{nG{4$ecCk#pn#KUI@ff4+V zo+F@srWc(iE=P>RKshf!T^*+2ns3z0rnZNwpYv#`w4|sYwOr>#wnMo5RPuhNZWa$< zXSP828N%Jt{U)B!JfgIgedRCMe!E>!%K6F zXYr+J9bzjY)yDG|1ch}7@3>Ny5NBUsoFC)ckavt8%6c~J@K`()nOa&GpG;Q5`%sKv zYrVzj-iS2o(96&`o8WonPo7*|&*~fNQ<$Va3ng*gB9A2McW$HlmKq_Om}N1hE#8ZCVxI3@ ze(2?_g)7poS))FId+tI+2e!l!%CI@Gg!HleUjgGC4L5pycFkHRvc%o8@)A=^@2yfo zNfm4W^QhrW0o;PPgNwk>k5rfTaUKil6fD{^8f18Kvg@n5{Gcz%7w~F#Q=l3``9i2NuKvw? zgKVFJzDhiDz}fawm%U4QV!a%DOF($9)HIzp{6@pW50uW1pLyb*9(XY)Z6Nm?uy;~r z4JtxtKDX)<85K1{1mK2j zA8IWu(An}D-QD=+1kL8%dM0hl#Gmok8ZFR{Zz@G<=*1~?~DzL$hkN7HN+?Ot=qu8Kc8*mAkI zSxH3Q>E#(G(poP%5yEb)20z|LJhh@GtOQpx;x-X$kZ2(tZyvZky{fsYV`e_m!FDO} zc~aj-Y2(=DI}A#sxhMM?);rY?tGK!=+nB$_Sg3b;_e;Uy#fXB$ny#BG{M#PXa8Uy% zOK^ONJl^Z%27r<^56h{0PnNRgR>x8hg2W%zc#i7O zY?(RFk@WY$k)vrCkFyuE>KJHc6!N_BrzNENNXi>+kX{ro@d)T%5{5@SnS<3~tslIR zH>@-qHh6w#+b6Uwxooi6dwBK6P>b`WC#Uz309*kl(F0~0UM^++o+)rYUahZhZBA_a zMQ2O9T#Z6|vG0yiJ&$$6t%AwNfITq@-;S~9Zl4#^7+92Vn22My@wWCf56oW=ULozu zax>nc&VqQi<_wEBI;2)KnBk{!d*zvM&MJ?vIW?jh{py`}Mm+R#o?%ZU3okjR+LC#MHQ31jc7k3vm)aJb_xfNWYtH?pHa(AJ>z7cZ)6EZA+;g4U zINQyTc#JJ41jg$`9A80k6ZP%X>qA^;rmF%H?M&a!42Npu>(M2ut)&_rJUl#sBVmy} zOBn|`ZPe-KBgAB{fZmT&4P#-Ffr(h7@DzUo*mFA*#pUC4qL(t_GddNh-yLJrQy8+Z zQ>t!a55jYCAn1krh*tH;m)+N9ZoZ@YztZS#CU2VokrMpW(H8+CI}v%Wk&L!maOmV3 zD6|UeC04Ys^|&GbgiOxyq3`wkKJM<80kUylt{NvT4;ty>NVddl&qG}+IqsJAmCY|% zD;5(~xtMt33#U>a0GhrnYVX7<>uUV^jdgIjJxd%5#O48ykry|geyRxB(FnAZI{y&x z8Wi&D&UwB}J+&MKrZHwEx)R_-=O<)YoPWXRkgLMOP1l}QM;R3i{TP<5%6MFFSnw8L z1HDD@w;bl7suAii>Zyn%#Bt(e_ds8`f9IrHc-8i0)7QW~X>?<&Wo_2WId`oyS~UOx zMMJ(yG{LKQx8P7oM)qOXYdpHuA-0i({n8+Xh=;E))DDjO-~6gv_$6kr8lAQzfDj@} zO;k6IWr)ka#=S}Y(mf~C$UH%p?^{Mt^3}TX(&!2wu#^H}H4>sKHvx<3x{}N08n3nF zebH9sv)7XMXRg4uwvsbrsuNtS+Tw576oIR9FzTC&$UOoW5SpQ`2e#c~f^NiDE7ln& zwnt08Vhwm)R3z>zCkfXVLSK1Cu~;6Xn|b|TmgE_arvLAe0sb!GSL=sZ)ciAo+q991 zTH~WTf>kqug6H#k^ZMdhmGYKY_^o7lBAHQ|R6KtRmWkQvM`c1-s5T-y5`>M&nnP5t zb(Uc!>JM{f&t2A!cbT<$sV-XNx2oIL&a_M$h5q>(a2HB+n4jvH%5zO?lF<5?GGlPub+y}h`))(7G9NH$2fUCtI9H-bMB*aQ z+-#Pi*a0q6md;XsHD?Y0v4HCu^{A0`KY|zUx~Wc)#x0_tFGGqcWzUDpKbt6b#9NN~ zGe~oe>?+A}oMt@5e*Foq0E_P4(pZkTx#wluFMR6JovRG@yh9pUVJmiOexvRWK{>x3 zb7&AqDN59B!4a9vT8#Ej2j6n%wTcPZPH{2qqv#J)z#$T|Jzd~BNDqXSco!R#qI1fn z$0ne)uKnZYR(Xm`iG`1>t#VzhTFV#O(QD%*GsK5Z9^wXx7Y@e<-6G_&g!o?PwfQK1 zhSwQ3z4X4A&)=)|*KUL6!{&Q-uir|#K1OzdsZO2LD^WYs!+y?2@cdNJ$*=*P5hzt|o!F^%u=bo+;(^Iq1>I`5 zQ1!25h{`uPA>C@5ism&IfQwT{Ko$Hd6&Z|NyMSwY6!Ww%g1(QOP8SiS9FkwBqK`lB z;%m5EyovO)MzK>TrCnC&kW(sW^QW0S*k7B3+uI$Mz#J~6wMfW3)MI3D|uxjyKe1RCu6sRP~l!W2)!lPMyKZ=EG{a zHDq(|2f7#;1nTZb5kVMKI+(+l8|K<~@{6|yE+VO_DqJD?j!g(wsJbd!ES2`fpu z#(^T!=eeu-kuBklgxfRdz})Pmy5=mn4QfMJKix=v4!Sg}drUUv`;#fU^G($AGXZkR zpYFVR9`>VN?3U`bq)HA zPf51i{Q?Mz4C#%0(gEQtHf1U}Ga(J-TPpY!Ph>O16s3_8d;g;Ue25FDqT5r2?W$L| z>nq->w&z&Mx1w%NCs`gggQ>eXXSKQy)+}(SjD~M;vvAdmCUOt=KX_!$G+gFSf3Dv$ zC?xJt*dLs5d*-<9IG)89y`bkdHGksT8!>=5otkDVSFcT7%3olbR$e!2b z=q6D=`V2D?Lzwt|<}hr*fgmCQ3U-<96iIR`Io%X~`AL)K*BsN`!t83UB2fhXN>lOP z6%muT!;%k=J-s+DG+FmlsJh@Dx5{chR0e(J(c(A)-sEjSm?-mFl(8itq(3J=ta$r} z72Zf+k5_`td(Ox;#=5%hPHo0J?B^|=g2L=PV_)iE*HZZoo_*Hst$-8U+`}z(A#?_3 zV!dcDG<29tWjUIKXN>bO(+yYiVqLEHiTY3?cKwDn7o*zIx}h`{@8xz)<5;kM&1mNw_nyo#grb z!@zT*0=A>g|0;ie2UNHB)zKq{j8aGQ&SA@`EGnsvPi;>WIc!%u% z^xJfk$8~XefdpNbf?VD^m# zaDi>Z!%zFIs@x&sNs}*&@WMvtnq^>4q(8k)aR5Y=QTMfo+k$KsC<43046~@Jh8m18 z%mGz=4c~D)@b4C%?v=DB-`*M-MxFBAAUxl7W9c?yPHHUzKigSeNlhkg?u=r*qGc|9 zy~$_P&_R=-S`cLW(Tsa;T)onJW3MB^O-}~~7D?s#6nqz^J9pzyN3D_0##HK8Lku*q z0qjJ6W=v+Us%+jy2$6HJI#NWjvLMOpr`GeeM>HzBn=iWLw);XN%4YIq$Rt<{ zt_hI7pHw4xscvLho?V&%Sr3kabX2=0ZT4pGIJDX0X01VZNjW(Dkn79K-Vt}E{h*cD z4cs9jVwM`E+$Vxc-C)%fFVpMhw^KEI*bo^(oCpugP4a$Wk}Pacfq!}7Eat$|8{tcq z+(IqkNK0fc9D3>Xh3+eFsfP0<=8M1Bp z-=OCpoDn6%jt7`E=Al3Ejbw!S$IkSZ?WdLCA5e&vfUDx#I!Xq`f^kD21d+lY`~eC;x_J=j^LYe>=-fIw|vT`Td;x>1LZRdECmL|82ki zj~Crvi~lIpdR+m_sg*C1nOu60pwquDIFCF}z1rKP{cQJ<#S+d*N0h?&J6y{e!zO1R zP3Ioz9MchQMo1*(#$nNe2fPUB4g#cOiyhS;7PhGU)*rh#k#-=M#4V)gQzYZSANEd8Vce78OPA4|3XL>eQ zGe{L*U|8_2MRCBwkTRdtr?~wlBpmk^8rKGz#Z#=#eUq~Mwps(i$VD>8IsNB_{>KIX z^PLX+=lA{^MTcDD&_xtTf$)>KcFDX6Kd#<{R6}ew|VW?R=Ny_WaYy7~bhalfJx`hL;L1-ogeA*8M z0La@@Z(!Df>P3lZzP$7)3Xt zX#~R$lu)KV;T3LsTzw!J(}(0AxpzR^_Vh#g(y=iGpd%bbjy8d3U1yKfduOsmMAz#8pQx1{t0r4{Ad`i zb&JT^;j8$=!O_#~aX!2^>PfZ7wr8}7(C{tQsN87X*x#hbWac*!kIf0WWrSIbmLq@q zPg9S3N@<>?+=k%ro1YA(1#y0-Cvq(94<9(L^OH3Qaed2yHDwX9eaixgzfK;h@RtQu z?dG;+t`GBNN)H$c*VeSI-s>0i7G-J3qbVU*+z8bWVCUNOZXk>}&xKA2e2|=$Zt0kv z>^W?yln$N>t`yJ8<&KxTqWi#gjJSxaFC)lqc_8fOsXWy%?XZ%7cI_5^JO1LZ;;Oqk zPnupmm%qX|==9^+Yys%gyOTVWH(cpZ)c7XZcZSyS|PkOoD^)_Y6#A4Z*EmG_&Qhyj5+cU^t1jU{{e<)Y7zfD{KtRh=S z4TCGnEN)~EgLz)lDoCkD^H{iGc-pFQkW`wW?JeEOiWo+=2d;mC{+pHdlwEgo%bcQY zBRWP29%%JC_u?Jh`$jmJyu7DsqZDU^M6C^MWkpZ(W0av~kzyU(!_O<%=ycX_bhDbu zrd+hCZ1e?&A*v8X?QgR6>`@C*Iff<@@%Y+2)}6TSxZ6fty*7CF*qU(-ESeeP(w>gw zr>c;P*elS34X1r+3QsM3V4m~8D36a~%0X9Boh6?r@Osv(#T4fdm;qz`W~C!@EVhkU z4k-1#LJ7TlcTBn+1G=AH!DYpc=(1?eOq;$O{z%LdTviQnD}!(o-4dyxTT!w=&(dRV zY_zE8-rfBEW_&Kooc1L~dQ}bsI?>Zi(YnnRCxDX4J57SNVl&gr9bq*7XZ%hv%jTzt z6XJF$Bn>n9v}BT@a^bZUdxg*|@Uq7f_LYn# znIG&PF|qZr`OFn{fyM1=v*s$Pz`ek?hj!Z=9^ZSmb z9;ez;;nq)f!ogOS%r6o8WGJBq+vHd^*udh4l%W6xNOS8+xgE#pXNg>0Z#>U330?$1 zSBbV-D#Iom+CQ?josrl=L7fo^Z_&OJ_D(UTEt8Wd65Q)x#F*gn{>l};Er{Gn<=sT= z$xdHMd@)}a@gP;*Nor0bJ{E{FP*f5Mdtr^;dyoOK>@Q2UNxuC)lTNaUEI|A@02Wh+ zyM8xl>zZzDVB46e`2I-qb+Ra_@bMWw->>#d(>aYdOj5@&rK4UTeqCnDWhy<{t%e|RbF%Fa8<|qonZU5zQ`zf4%)?u4^+2(AHcmkVe6{`paqB+z ze*2rG?T{{SE~h1AHy89(o>6E&y1RTcU`*(bGA0jlzI$3xsoSP%W%p5S%1u&RyJ94p zu`FwI%O!#1V2dAf44JARce?g}3h-Sc0d=P`h9ia4mW$%=;p;fWntKD^0Q$myRKUO-ftK6k-g@oPd}i0Am`Tb0Q`gw@HrUNHa?@7XMg z4Hq_QsD`a;&AZRwclfPW{HE1Rg>MY}2sPPJiA-{UQzy%#w^?}Uekr{p?DUD9CwNe= zEKsjhxsq*7viWB!T9a!L)i1Qq#|7omt0q_qY@g?fJ>@u$;MnYWFaY1wgI#UxCCuvQ zR^2RKzNpZh6wNjCMfC;gyj5%1=3S3h^_AT_*qy-x4umi{sDWxuG#FDVd9{+CdI>bU zdSlR-y<5(NUE#24L{Q4>1zQTJbQ2s?!qm&}YF16XLa@!+S{W>0{=X>|+8r-Q01 z{RL78yNMT#ze_G5d7Km>VAwa^L0CY9SS-5Gx{}TIJFmd2`|!g!eZuojE=dsRRLpsh zHtX^Ctwek?7M+5GOe2_q!eay?Mw=@MSkCRLYE3BdqrVvGmF-@y#dv$WlIgcxzRQvr z8jD_r2twpGs2pUct-=gZ^3?T47h{_Qkrx5Azdn1de32fS+oAW1V>oA$9!8yPjeNyX z4p&rEP`vliYyF&8D#?igJG4UBiWZ9iit?E}lN7rX~$q&iUUAq2YKMIM!$ zmHxRWFg>BJ>hulvKyGD<&3br3%E%OZx0bbICu|j5(yd6be2oOw5=XB#LYEl-docbJ zQ`X}xN(0B9UDNkG`jt`DikUvrq4eTDn^tLe-DUoos8{>+7#aF*3$cZop`zHd;vLp3A`OGFNRvRz@N``4~ z42CtfE|k4*?#f)cMI8P;>m;#C?-4!vman}-DpZQ~*ojn54k5~8nGYu!6uyl*qDh-kn0mMx4Z-~-raORP=`(Z0}( z5?-VtHJE%4R~U5G061@v{p>~SFY?TN*pF7Nbyhz7G!Y9;eg^BTy_VzAYFO>891Uhi zSYXjBPPh`hI1c#6#_l^mi=eHHGf}+@P9cgFE%%guz!BS)#PR8i={DnyS$zTA^4lEI z?Lc4m1mzW!A*P(M6`rJsxQCl4dSFvj+4$b&micY_TpcqL1qEBR5cRO}!&P(n*Ka*f ztuq$e1FLU`L_K*8k*1y|mlD+sW~l1Fi`kx8bqGwduWFlD7J4v|4KEg$1}X=YufDq` zrjcehndj@9?SSy1plF}QD9U6lTuCfKgLT{gjzcFx3AYaDXdm3zbv2?wut7R|gfe}EE{x>0%h~hXxagjOt@@Dq#JNbnToAgeX zVYqiQr>?B9u^t9;o2tp-!#rMIIuCY01f7%~1)U;K3~VqU1$HfA05*t#i2#JYbJrPm zrD28R<%CznE$Z{-ey&S1O*z(Q<;M|f0EQgjcGCQ?VX0(LxIP9e$~}R(PtJ3g3ux)s z52QCSZ_(@*JKU0#4{0)Aa-V+h>?r_+kPRKlMSwB;2cVAultMritpO}wn5*)qmdDB* z&g4hqW|LZ6EB&Mz`vd7iY^<6#`^-(tiv}M-ULQ;LLn;xGaimW28ulQim%SVA;f8K) zEwL-OqWJKsp6d9Ou&Y((a)I3nUl~*7s#T1%S)+E-61t5!>7*9a3DaVWQ%Z#}n&(81u1-gey5WCP#&IfXa ziphmr6Kl+aNL?cPPd1M{@HkuM}- zqF^@(kP56KxSuA_QD~HNH|t?(?VjpHkV0Nw*GqyDI4j;a?J#-(5F+@Nkef?THuP^BY`oPYxTB&@EHZ zR5Oa@H`FPJl3&480)p8q>}kdNp7z7GPOYSFL&ikU`_3OYHCk&^*v`uvYYdCNPzX6L z4gnC3Q=umnNZxOqc>^w*e}S;FiM@gdeNER((+@`jCA-6BpAUue`Xy~Y5ParMHoR-r zb8FrkB)T?*zfCbH0aL|h(9JDoP6sL4wW_9@Ep5d+b~D{dd#*0GXvaQxQala76BO0z z<2Tw1jcpU-1oqcbK8J*yXKL$7;R<99W+DTwwWUMD=qQ=ANEVL{OxQ zyUoWj(OxSYikTWou_jXyPQ=_&qP$`AEKM+?VaV|uzd*sYumyqS%B2{X^!DcS2{_~J zrknePE54a(m*hAZel?{~*U9&YdBC~;l%}vkmB<*H-METvNX#b3E;<#OwYR5iIL2Z{ z>+E4UCTl)4lnM@soyDhGtG|*&Md`lQx-S{Iq_e2e8-15N2Mm26U6Jc_? z7;un1g6F=`_PI65dDW?uAur`>OETZfXyov;t0Y0kl?Y8$&)|$$beu1rdkFV&?1j@` zk%)!Lnzm>5pOH66@^o0%D$u(B;a)_)P8>gJ@4tx;sb++pKJwr0>P(^(L`@8i#zb+U z-zn}$)mn(QHm;^bjKMl8T`!O7>1$AY^(C3m7f^l;(O`jILE3x)2Y!eG`NobD%H?e) z8YgZAx90_)Mvi;6vqA3PJ*`>q;HrMNzJhcjQcha|Qf&C1s6;&2D#|!G(g#Fx$s5F3T;hH>E`tt+;znGB0~<{Iy4qZx@tl&^ zEVgm(Vc}T9w=ZdRNh$VW^1Z_b+{*Xw4Dj7sG`*^1XzY?`rb~5ii>4XDErxXDif3yb0EVzod zE&|fh@0}W7Gx*}`o?8!+{FSj!ra~{isJ7P~a`t+0aS{~Cj6bMfi|`=*I4KCFD2n|Y z(9l`N z-E=a$JA{fkL#WDz`J@;T=vA|0Jm#kMl4c?u!uPjgcXaBLxMdPLeD6(?KL|M zyKW|4YFa^+EBI^WB|d6|ihB%2B~l(lBFeoN5PYtOMRG(1PGMo#n z55MUV^y>w#UD4OZNugar6KIy+%j*lH2o@Q~YT$S%i3R!xa)Tn%EB?3kZ(}bE4V*x6 z-KVe?XJN&HfHb(4Hiq)6^V90QcXsxsRq5mEvh+|dk>un;Z~dt9Xyp38D4+h#%h_=N z07izysrc3)ricQj&KZpfhuQ{l2Qf*-o&mux=ESD=Y~^t+ad9r}_sE5=aRW55xo+bJ ze&Hzo{2*zJ(`P5s@bF7%W|6^HIeoH?hWYMz7J!aDNLvfPP-kfq#fk*4-U1C#i{UV1 zjPThUdIrUXvg$Xn%aC!z znY=Y(o;e~UuyRACz)##Sn9pGN+j}YgyBsCIrs5022RMKeqo3oQ|M~EhWfkGcioJvF5fRO!*SlKinz6i+)oZCn-a=7sF9wL{uL#9HyopMPv>5%xrZ7>HH`^Co{9;U~Zs>(uSuA5%`o zDUh^7T8vrPF}!zOx?5MLB+iW%8fNQ*$Wt_yh7j3gh!kyh9WwW+zOIIh^_5ut>Ka^_pyPB>g!TpwuS-NTNmyIfRCx92&#J}`9;>aYUyYGl zw+s>Tl2Wn8#OZ_W{((({Z1oayN+SXn@GUE5QJU+1RXrUl5`zuuA3O{EPWLJ7o9T#| zeku6e@Ei@Vdpshnyg!YT=SV0p|Iv!g;999qzIQ5B)m*N(61#3{k;{_ip1k(^<*Dlm`x25vntn;0TpU$ItRH87i+*}0or8*LP;xsJIxjGm!F8opK=2>=gW~ngw_^mv?Dr17prT8xza&3PYnD2;xZQ zAWi^VhX}>U*R2VS4-bK3yDWqagK5GE+cgl8J18%;sJp33ImY=QB@hrgOONqA`T_?Xm_+9Xu zR%P}*An0HOFG9dz(~9G&zai<#H#&>ZjWW%kKgCPs>3KvOHNmOHfd>MO(<6L_|D)FhAeV?3te)wjp zaxfiXYJUh9#6hOeSo4X^LvCyu((?%Mw-JuhUpv~_6ump+AAZjIl*#ZV`NrjAXMQg! zPK*6I1mJW?m^GX|uP_GlqIVjo-BIY5WbN=6yF8{_{I{=Ec(W;o?M_x| zs^{a>FLvXuLMNG2zO4gBgeCcLBsL1=Jl(6fw&$48m^ZV;Y&Pj&;9?#?TF4ZfIOQz9 zCcJr1duQ*@1%@xmAlk^f7(pN%ur7-!iviTh0#&jc@+1Ql36~0gbk8#aUzw(uWpHh@ zmd#0YR17xer2TszMerHq-w@d9Xw1Hd)KHMGGZQi=wsuQgE?$u{P#tFPdWT&gM2$feWb;bGRPKG`(`rvn017j~wN9Ek1bNuzcthN6cQ^|J>qgYXq zCzZXtwAm6i8atU|(2;YRCg*&tdAQ+wXs%h9ydbk`z00^M9}At&zJFGGu0#awZ?P;- zQX^VC3F_6&;D|Wbno0?R^eeq7TUzxKPJ7bFI{-08$^F&)teBEX+1{tpSg2k!*b*YnKq=ndTYy!6sMByg%=JAjpUB z@PNvbM@l9WDK-&0EiB*wOZE&Eb)LK{pnpxYwWFVmMUO-7 z-XPoub~!EF6#hD)0ca$B2wDTK7e0B;&}4f@1M7boXT#{5j)OX^iHv>ptcUqmm=~g< zcYGV-dX@ap?Rn8O@OQ{10yrI^frHzb#MIw5v0zkTd3YBkr9hVlsYo zG79%E>*o}&t!u~Ip;F(@gm!hFHRGz4iQrEw*Jh_A2I9xBbJKss1I@K`I_S$MrV75} zd&Cv250#gB%MJrKBW32ISp5EorER;clCRWN7Guw_xCjKaQ)WMOWCZ_Yd@M;>Idjhj zZGce5Mcp1ov#LXZESwpJjfg~(<~e)|<;`THZFjtkeMRBK{lf6A7e<43w@k6odcrAW zS<+Wm{&B-!^{e`Bx6wjsE_K}t=f;SY_-%bIce;E&*}l6$YP!5mp_VLXdfk4m z{tQGPlKvX4d+;}3oE`>k_lZUMD0BIC8YOS(p-g(`=j(j3|B$d3UU%9*nduHLSCEPe zNmmc1WB3@Tf5P@(Dqwklqu~QGlz*o~{lBA^U|YkjUR$TBdasna-ugdSuEQTqeE62N z;$SFQD_7klKOtXRWDtL@6%^Hf9pOZQQbUM8>WGQhjr|kijjyvV^AJH%?LBtbtyUPS zZOK0j$larqfkj=dv@pF9NgODo@v$bJp9X{2x=MvRX@i}z7#5O~rm zatE zTh6vO_&F>zZ6ECb>@s%)ozJl?1dVKDfd2*_01W(5#t=B^w>eXhr|pG%NkQ+sIy1p? z>!)z45Dsg;JN)i^rXaDY>xe{xHx79G8-gmS=H#O46{^Ox@&~OjSH4-edncN55%bnR zd=a1!u3|^1L)@naEEnwvZ4hhG~l)5QUJfG2Y9Y~<&OS;^y2G^63Sk?m?JV*Kd+Qqx{Z(uVRp$AOdO}z**qIwgr5UN$rB!L zxjjL$AXEvz2hj9J#BWH^HcqE}d);h)52@_Yz6;6>q+beGP*LWND@%im5At zo~T~JbacSF`{*)Jqpv_O{AN$jdsxXkhaaAgVG|Fz_CM3-$cm>2>w{l`n}ed!@PkB6myw37Z_&5mis55oMf{w!1kNer{rTDrtln7Bw747uFemm4E@z*+=FiX9(o-dvQ7d z&Pl!k6^d)Z~psqnq{^HmwqO$P~8x|3;%ojJQ&gvAMv@9AG6NLw!EI;m+f zb7L~}+I%WmvFsNvyzpAO3kaONd0VPk=J^C~eS2zfjYKKPi(r~ZdU8^L@&~4owM=19 z>Fej(34tY52-O_1h4Jt9bFQ$hoyewqQT3k2JWD=gytMLRk#j`Fsz)*(7hiroAU-VZ%-I{tSCRi}3Aj*e!99Xy0R%~yt&4T| z){&N?0O>BH>}8#1AxDs!y-FZ%5hM$ErR(V1VDWAB5ak=e6et=Dr!S&Mw z4_gb-XUr?XTGmg^&)naA5F#~GF_W13zU|Q6SkP9f|G^P@znvIG(?d=HK}^jW&mV;j z_jq(-*Ez4LtU8U7ekR3?aUEy1^_hwXv*^Kw)zf52Ux^!-I*Tpzkf5%}R@aHGP;H>D zS%s^s%Z=RGH%8eTX4z|&lZ{37zkT0+4!RK2c%rfyl(+z9hVc1;&q4%0k7%G10=Q=_ zVo6)COmBMB{w%j{$`?`i@lH!Bv!|Nj=Wh}SShp#F&v+luv@HLg|5M~v1aJzd{S(SH z1P{3b$Q=`dv=ds_^N4Pa+8;3H^FhV9TS2N=i;I$l=E+LT2LoTm_%)x!z2qoEIFuo$ z7r`wYaiJN^-~X654Cuf_o-h7dNecHD5O`wxrCT*JK%a3aT1sEDstFqEGu znJBpl9aglp;QS6f9f~Wz`u2v)myYHvGs8|}=@L3KC0vJ9j>T2ioJw=bV_!69cP0d-4b$x4zb3p*b#ykINyQ(*ZSt@~)C4;l-g)~9}%hnXAR z*8U5W((Unf6;?p1bU)ZUkIX zhuq1nhL6f3RQi)jgdxQ*;x_5kY=Eu=F!Yoix!w8^E+`?}RLzfE&3Iz7M zLY0zyQw6~nlmIw7}Y?qE^S*4MqMyd{L?MMhZQ|7PAd$#A zl$%snvP;b_n2G9Ax#Uaed$#7Qe0Six4L|;?DZ_^PjCdL;RbTOwiP4zUg+V(HLfIxq zMjb?#{F78lPI*|TJhXmc=H91YlWOLXL5cwiuP#vP13lB%e#m5-lqt4dp0^2$UFEXt z&!C{($U&Bbyg?BQ2ERY$G|x-q+=0VC7R|$(F{*y%0gQ(-_e<;R7AvpzGcvqANkVE& z0=JFiO$Z;GAby>}YMakXyJCAO%D8gRiCK@Z~U{K0E!=yH^STJkuk5tjMjv z=`;ytRBko>6~qB6NIRNqHjupU6H|Kr?TrSZEKWXyy>YfGxxP%~Ov$EQ280bc&PC-u z$v%R)2n=aU*M|5Nq}M)`Q=bZ!=`OJJ$k82ZzcKM5pYH{Gss4x-MW6Oz9YS6sP_`P9 zBo}|3sNpOyLiM|^Mt2VMzg!{6LR~5?8Wra~>(;!S#uaj~h zuOFa6aFs=`BP<3`XInKErDlKC8AaJfIo)X&f&Y}sZF?FNknb(|JAL!HIPxeeUJHXx z{HH^>CJ45+xp;K6yXLqb>qwdkfzW>lxcly zScSm)G@&jeC)kOYh;@(}7MICgB)OA(4Y#GPEn3~Hh>v;_2QEE}p`r#NJ?iR(3O~jp z0W2&*h)(3z9s06&8l*H3uhsSce4+jhlKSU@`ak-NE`*X5yyJWb2iV_nM?g04#Dmvs z5pM^6;H)VcAErk_bZ>{>l#jI9^^+T9oly>)`ejN zqSqXM^k#oMcP;Sq=YjqYHz)?%;l{%Tnk*V?h)_UYjQX2b&PW6j|D!dcoCe%D^DGD_ z6MLH4t@~K0`;t7%B$ZwwU%1N|x38 z4Wvl>3oKBtZtW^PTru^uN!@Eso>`wUUWtj3ijfIcy%zXcad86J$V<>{^{9X%<{`J1 zM7&3c?4Zy=sO+qIkDBHEWt&>tZ`r0&w(>~=is!E#BVB4YBA;?{tQ5!}?*f2UD0Yf0 zHb&E#J<%L(+kzcmp$&ocz_w{lA_X+7AVfMTGrj(NlLmmyez$vb5_6R{ za~e8taH)QzM-MMwOEDNUWYceo))KI}i#%RLfB{}GsDFczeESZOv6pQ@!^w;ROTlYj ziprgnP-!;f#e;I3&}k0!|~@-z0n@O z66Q*7G)3Lx)*&UR1;2aw`oo_spp#m)VA zI0Z7_dS5}9PtCJl@^dV12swZDw~u(y6R=))8bZN6fBNT!38?0amO2QWVo)6pX+eBxzd}($DCYqL zK;8~{g{U>CU0%Y~{op~3v;E#eLE3zoueFo2nV+9n$vIVpU}LT)S1i#GTz4x7*=*%@ z8aR2c0ByDfFLJu1IkY5+8m_#Iu-clSq;Xikm*ZTW2*{c^_n|Cpl5ILsGIlG8;j(DM z|9&1rCP%$*!~aph!XKdBv_wR{rcFRph>Nj+=pn+|h4qXz{MT>-T{!9KXZWf)G&hhB z(IG-FJ`32(=rn1?ld$7goe>S}g z|2N{6jwgXbb`v#bNOt)PbhpkBNFyI{t6WCyvk-Jf?1}}5J40O?oCxEdnf*=8Hk$an z*xop&m6Q9*>)}Ec(DU;(9y{zdC||(Gpggd#{9uH6#Enlk6nExnQqJp^7Qo;Cy16N) zFgzznXm%;~%b`pD3i-w2=M6RlGZf>->=2>p#^c{BpJ8EjPOD(T$uvUThBZrVd1XcZ z2}6RX(*1C?kZ6vP46x^}XP*Ugay(LeF@odxsDx|GTwef{ zGq`1H|Kg_dGTO0auJ;0YW5~+y&pkzt3cugN2!74fZ^@DpJx{WmO`AhhN560z;GHk5 zzdK2Vh@mig{@`Zi6vXw0i01zGOU6N-p$-AXKh5kTpDazDdU;vp5?itVIFlyM+cFvu zEZMn`)kxXE?e;(oI8pTcrk8w$`XQ2@=&Rz`X#3XGhjsV+cx zoMD?vrB!}!{2rH^x8z#4g*V=K*Oa{ycIhHe4b~xsI65hE8#svdub)7OMEMaTzi)1dVsyzOOu9 z%bSVp{x2xr-$@?wJcFHl=oH;6Z^`~Yu-F-P$C<~bGA%i;I)FAqB`?9|xGc-i>)idi z*ekqrggpEtMI-{CS)!?iL?-y{J2OxA;4 zt{sJ2SC2|QR}CRm$XOIA>J_5$phH!m)7-D3JOOT#i4fh8F{w)*XD+h$+S^De7Y(-U z?-YEcN>hXZREG=+O6XN5ci=3}MaV^sDP_EMbqaVN^Jir$lCR0FMJm|c>3Le7+?`Un zC@9Wn|92G-$m$)x#m+PIXa|$ERrn%Mosz-ka)v4Cs!+1HC#zMdjjjySX~FZJoE(!D z%XXx|cEBIt3kATS7UMp1ANx34Vn*u24R+vU_*xcH-S#SP`>8+y{!7t3E;;+ELOR{y zQW)PPa&d90nCGPUSC87@vQ|1Eoh-8_X8>XXspN`v7~DT=mv%q$eos$bT2C##omHHA zveakkYcZz05zV9rgodZUwhLyWP1^x6#_&3_-0;_d<>B=&G9~Ka68e+sPG|T}zWczG z@Nc@rLFd6Q(eJ0 zxhpQwFWozXKI1SfK)%&607x@MoN`>;!naly)r`2;n)ejpS8ZqIt|dM>4wSy7UG7_@ zBLEJJ*dX2e1dx^~I9ydsx7k}V!^i_i#dtg|8(xuU-D=(?+1=Wm5aD%!mofM)j^V40 z9diQ8%Z)z5t>0CbFkktucCsFE^Jj@GQO%|H(b|VYNePo@hUY>Izc5J%;*#C34r6jh zna4K>mgvvG$>XX^hGQF+&zLQQriqLcGd6ZCD%20xge$PNaYatCS(?j_1PB^Gr(8-p zjXgEcy%3!k^`Fibtf6MtBxndTOPs0iaCPQNMwj`Q^uO<{d@N?Ms_I_?hCJD zrD7SmMg&>kzB*+COb)*cVg+UYd(;j6-|uKyw4XA=w~cmY{Pj@Eg@`O@W49Jzck?;RBY4r?&~< zj24RKY{{LO-YJRo&&`1pjqTP|HA7YT`&Ispmvwq=`k05n??g8SOA??w2!MiSJR(mj zKrT+}%D)C4pKk>zL)!|=8neb{Rz|h)K`m_Z+@k|`&VJ)exlNTJ17ddI_jPbJQ_Kk0 z8F(+>lE%ltMQn=R3hkyF+(Ygp@?fP%hc$}^HB5Luk6>sJ{; z!IdP3Dktat;T^!4HO?{eFDsR3zf3Nj#apXK8U>4ORR(wz)4k$24{YD}sKP{W|G|=v z1xgSjUEhIoG1wWjyL2;N`iYBFh(Z0?)7}ZZnXz=%U$`=1zcjm1xW)b=22w29q5&a6 zgp@{A>~{XSmc8mXQ)|a6o-YFuu(PhUl&`tSR-XP|vi=i7h&iDb447s)9;V$m0@$zj zEnSayy?ESC#nRE=I_%8+d6!AS?o@|lD+pmhkpV`n0mB*r0pQLF{z*YO^311m=}bkX zF{FG6J>2__ySO!rtsY?Sz61bIVq^d;as3lcdszqj1{hVqr>OE22tw`?jXOfs^oM#6 zyVr`|wC($1)9uLZY}@OW4n<>|ASTeJod#f?pVEfnRdUk>^d%NIji>+X(qZ_sPobA_~KeoOJu8um&_s2X;VS z@HliU8S35ehd&S){T*KO7wFIn6yxg?ezcrng4S)|L58KpC$A^ z8`A$@`{sjQ9ztdOChDwxgat+u53AVCI<_V1Hhmn=mt``|{}O1r{i5L8zctqk|2Oh~ zewL96PwIQskyC$xgdkw5(kzvy6xQ|^h_{ui@>|}0|2)n8XCg_8#IV?T5-3nox=eZa zhbl-DDNO?kOkT8ioe$=aHVb6u@3)|t$g%tO9NZB>c2T$17q)ZqtsldlnmI(v)gFTf z>RMDpU|;_NkuT15u7X<{_Md7c)NOXnq8^C7OtOtqS`nrnR)fc#rfkciV5=5qn!F0g6w8hUs0HzG(>$>HmAW!;elRd z9GlgZ>}hYd;)ld>My0*}u#&tGYh4_vj94|e@%iy!2?tW~o0#T;t`*_C%Yw(Wu5lS` zQPNT_kgLL}#w?lFj3){E9M7@+!$u@5`vSvbQpQOy!iI8z8bSajdMJ`Lho~G_+sM*| zmhZ>joe7hkDrytHpDzBC7;k>v%yj!uC6IZ&SC$A)Zsh=>25boT71ajA^K8e$@3z}n zuorz%Dt6~B7Tkt*GX}sbBFC`3`eZzztRD(!tAK%T8G|BRsMjzQy9T+!l`-r6d<*8> zv8H;E*FH#(q!9uTOw#Q~kN>i~*t4RIrTcnTwI653u&0_61I&ICK1L3yFkd_uoTAKQ&s zKn&wEBmg7W-6892iCnEAHzDK# zv{`nLBOXPM@+mUjlF5I71290JHS6&|q~VX6eh0RiDSq`H@!@J55HTA=)!HY&Y=NL7 zw-i7#1Pt$?M@mFUVxTRz;>s{vB}w71N(Wd0-y~+JYA+~5Y57q8x%Kp@#e}wMG;dy- zkqIRo&4k)42YPGeIJ7S+21!qVVq8Q9WnC8#x2q<6gMK}oRFUXBR1)VOFnfXHp6U!% z3G}p@dq4i;1edBkTq7A643ZTb`w^lKK99!J;Wm=cBDz6@+ne5b<@pd1L-){0C#M3G zl*`_Q!>e#OJfLfXMI?%QiMqx7#$8scb`_@F_&>SOy z?b?W7#%rzF=09TUy)DIbB7UkYe?%*!)>RNZaK_OOf$2<(!)__i(+wJYzzFZK;`syeXG#C}RHQH?q%vTq9T3@Fe8eE0v{=6~<3;19v zIg#Sv6AHTfKzxjmSX4%|1A-e=D0irWCy5W;lFb_=Jnc^r{ zQy%ZSRH_}bzOCTbIJSjcc}^=~{ zJye7AO~vi$9~6N8X#cZA?!8G&961p1cLRF@aQ0b0?Wm0;^*Yhe9-rF_vRMlI@a>Ru zoFj~^Ud54TP;%S|nyXhF!AZ_~?f9+uGC4&0*=k!! z@S=9qQEr()QBVk7mi0BAh-_JAKgqL8;)rK}XU>>%y(AfMC!zLN2=^e;--@}4^>nP~ zJ?HVi$cr|Ixsi2c4oMqd%HJiPe|g30@=#+hTOQRkjBMcwFuoT&&n+W+nq{xijAX1NIp%k@Cvx*$P!iY8b&UQwq2H;XbgPo4Lf6 z1IheCRqeD!jQEIx71E!&aneLnCwlu>GiS}S-BsR@Iu`|Wr2n;_2LcD631V`c#6^r7OT_vg-nI_n*{plj6TFd2 zB5Nz9O%D9>!kdrtoxtj!~fT3 zhC@TrD%pc#f&hqeBrpSND`VjCiJK7_T8HwR3W3y=IVx2=?b1nWn~?tf;;ro>UB$J4 z_SL?z)sq@sQHnF|v+JqE&GZaH8#tj?bS?;9uc7DWwh*B7g=+=+;thLmq4d~C!ddJp zE|8ZSb94(x?;k&T1;{}*1lpsK%zGQ429GXc`#y!FG3$=b)Gq)e&VAA;rdRC0WWBOh zdxFuHLg79_;jQSftme3ti4xQ zlil|A8$f#R0zwdiC{;QF5^NL^Q529GkzN8y5zs*By(1tfz4zWrq^p4R7HWd1lt=;+ zf(fkeS?k+-|JQq6-_hRtEC(EfJoA}zjxp~0clQJ>h&uGkQGe|rMA5vt2DSi@zN+V= z=QIT4LH6Zh4DieVv8@Gg?Co{&=IQ@IU=CQgOy+%& z>~5BIktK%pyQ-OohVb%1nMVfxij~)x#xx=N)|;NMnKW`%i$b8nJ-k@J_B0tqamTX9 z0`8psHG8Z)pZoZ8t7XcQ(vdd77plK}$?B1V424gQ^l!;3X9qG4M3Mo9Wm2D3fEVTl zp?;K|-gehkWl=>S7oe+6yZW7F(`j|c4i7#|Y08NImgXucKK`ykl=jx~O`t8$9}aas zhy@5geHyONmV6u(4lh9th$d)V2ADVbZF;2!m+1_<_admT)h$(}h>g3qvSA;;IC!@dP>o8*}b?gyO2IQ`Kk`a0GDo5jAHei+P z;zvXJuANV8w6YH)W6;iF^*uqG$oL7nAY!hF)tw60tc7c07bU}fzx9DA3fN?9IWbU& z(HS{WL;*bslE>zJI!}&NU5p$Mawi;TE(r78DC+2HXo(ZBCmRiZnxsdUrP(Xo^?z!Y zhy$Aa#z3On3(EDjv&wU>^NAB=vs@*9WN#?=+H{TQA0y@=K67*cVBnHq$y`*ERT($G z&g}bAME-K_kiDnjtK;PTLc9djX(Pmzhoo~7Ut#Nl|003=O3h_8R%wg%&SrYx_S-~J zM}pT|Dd#VcS^MA77t%*Rhc?)PuOdXwnpE=9Pk`U3lW-EL6=dKg?PcJr$H;R9*6?yFBO2-zk|=;+!zW3+^BK+trHs&Sp(+UqEq72zBXKc zf6({`$%sj+nd4lJ2I=1~5OhOrbs5t>afbI|*b`xG{E1rp6(oOa5~o@?#ZNvPPFJj5 zYj-8o(?;K_(n9iW8)tgH)J3O_gupl<=@U2)6`?E_zlc=>?0_Q6lpALsTHYdoc;A7{ zSGBpl7Y^>l@PaZHHhEI!oL=OwQ3Pyz33b6Nn(n52YZ2e!e1 z#XKIs;#acs7e3s(N;Ic7fXf!gevdMgj*nExeA3dXD_+HP!t}o}_5bPJ1Gg0Re@*>= zR?pBJ@)J=6Siu~-3EtV!81&X z$9drl2ycmDXW=RbG^8J}m_;ae4;5BeXXkHJdj0aUTp`;$mvZKb*>x)=ZjpUYo2q{A zI-w*VKx%DQ>z3aJD6Pu_{7ugX=YGpyF?@@mwm!?d<3|VnlKvfm^u@b>)nYBT0dz$> z#t}$>N$5!)%9EUOG_0ply6DlPh7dRz8JM}4B5-A?l*+q~(86`_vg#Aq9ohc?VNbY& z0e>nZ_2bJ&{^my>+sGY$o5s}+H>0iZ`G`v^l}vpe(9s{14h;=sxBR<8d^6i~%NZX& z@HaWG$EYHp-fY2l@8<2@qes^dj_TlB{`w0HMNeZCG|ew091b191d;vL$nWERC^EAv z?5}p;E=l%ZAc)VWd?FQ~@BWji0m7qJfJyGiI^Wqxz+WhiIf(ZIGw$VaHl_WgZ#)#d z@!6MF(g2&~jr(j#}T3 z2g(plVyFks@)z~tNI^e9%L2oUGy_{P`e#(jUauNL5Z;zvG|Ad@TBi!h;`6>gsI|S6 z=R%Wn_7RBr=pNZ5=5Vk%KR{}Uux%)tyrx}xW&ZN*#AwD zO@5Hod}}lr=?RS4n{BHBznN+<58n2}S)rqB(T@9@iEB;Lm(mRF#dvOc55^2$x;AM& ze-`B%znl3@4TP?O#+DhLf=sx!1^PKITrKFRjH7*HtQ9B+s4RfVUW9mIy zZD7Jujd(Nj$5>!bgdvlaYu%}PI{3@auSGW%5andY-me}K?UzJ7Doz_ybhQ5%N5TYq z_zCf!+C>CO6SbtVyK%eb1ir}HzKowpr!n`JjW#3jTDSu`%TSP)$)7vrWexiZFJZJF zwR|wT!Tj%OCNB`A3r}&c?Q^q*nRm$xXBBO_gvR!oldunWkRUb`jTbney)xo3%SUoopt0k2!+IqF7cM z!w>RX&VIBK>ZDQ2@|cV-$tDDd0&>8-N|co^yzqgKsL#~edVzvf|6F09klR)J;ox~> zV*~)3@8d*h<3~SzAbYWpvNpHpODCfer|muY<~i>wc2zeu9&Ww5Y&Ir8etkHEo}H{8 zaD-*wht^ZHNM(8I^FS!k!I0p%277D^?#;8hb@5k1`dP$una4fyg7>7A4aw!Aa`T_X z|D2=fpNp?AAGamC zSA8&K_&Dn6_;KRmkwc=Ho~azo4WSO89@@v$o-Y75oA6*LaFasZ28`sTMSEa;On}+> z;mnFrm?lZ+OW$0Ad&fmM+Ajrk==JQGRP*)wg(BE1rCDt}_{dKbxn@)gD-^mq*GDs@A(4uK z192eW;%`0xTM^&%??>?Z;?>?WA0Tt7;g@1wML)2lm?%P14 z#R7$K22Q)p`u%!1c}dF)OI7Qh8ru0V7DFIb7H2hBd`DLqn>~1I{>$k95p4cjyvchy zzXgrgI2n>(i3Ch_6L&`V`{?jLGBB<4&*P1JTY4A0emuV~yHc$&d_m&99;2R<-izU< zp?{@eDX2F3LAX3|_^b?|n!wl*DCU4!OkSqSn%+o27EaYrRdDQK=;eU;<}VPRY=M7) ze7h5nH=ch2-whr^aZ>!CqmzjnpyPT@YtqIblI^#0`^<6td}192ewzkD zHqa!cCzLkb=o3OtkqTi)%hLvE%zpYS_A4_dv$Pt4lV06zpWWq`+Xq3uXsXU{+Hpv+ z=|3`<1Y)j{Ktze)uYA5f4nX(J>ZgP`gJB~nwH0oJ67N+DS5+OYfq6%E!O80^mEy$_ z=$9oadcan8p^p?z;2%cOAXu^Kh#vx7ktx9u;!zY+VBkT&9u&;=lEcjcAc-wc&=#WjlK3l^pHXg;>;h)3R^0B*Og?H&Oq%KqL z3c3Uvg2swypQQqM%#nElb3GQ70fU&V;gw3?z~>9Oa&>=l%@GDHQ#Kb29w@ea7F3g| zqG=fYN>?6v4hfhf7M@j{0|{QUJx?k~hM!;zFIr~Tlr2oqCy(d7SVDi~d@t_OqY8Vf zza@1M^D)00XtB;%mX91HZ?q!!5Ae`Y5n&)vtYSx_h?Mqs%y6!BzPz=$CE$CS`-gDa zo>z=q0v8@f+IM?e|9%X60AAi8Mvz#^Pf4>|qrjqYEGD4_P}~bQRBG9upwbQV){of^ zG^8WL{G@xj!&^S;?=(lg0qsH^^OJ0~VaXf3PzU+=H~j>;c0BpZoNv&JXIpW*Cwg7a za?N(^o+gY@uj;VG6$jdPPG3g_)ivS&@J_*@^jNyK0byMoAJX6+;Lz+9I z+|&xaRp9nh*P(09QSYjv*XydPzS8fP{^NZ^{=-^b=_Z)>p=3p9&Fr_dw4b`7k=RH` zzS#>}4!1Xcv!lGNb5FFUPd`_RHM%s>9fJ-#MIBz{fV%%1aQMGJM}#N_P>1mSceCsN z@R@zmI2qXpcm@+uA_!r^2zy+c2sjIa8!#?hl8;*7=75YT@SC)iMZMU*-FYtt4wkAM z6WJqH0U&K8x*PKwFg_PsC*LA$M6F-jM;MeZE1q&A$~GmMtpzS9W~?@TNSkiZU0->4 z&?1n*)64-9fc&BEG}0XHqgzn9rVN)Hvh@31$T+xi%bz9v@*kJ^c$#CNG(?R|E*)%b z0MxB8OJqNLOuk^tSiH1FLC%&*{6Lz-*-k6C>*&k;xW!tgcWT{Mdzp~GPcee&td4SX zvsbBKLv8fz3R?KeH|C3SC=6rb@4K%FF{$tSDyB0B!i=-R-kL8H1tX&>J>-4I-q+!l zb1$THeU2=Ck)(D_Tmzw_pf~1I1U|HAe}muyqr;N)SGe zIx+7GUwwa@!Co-Io$hm?e$v*}tJlM{m`;E}|7P?MbeX()j2e)=GqNCZE&5Koi?Y>V z(HBRFz8@L?0?kapzRHeke#i-s*< zcw_pZ7I~{7jQIoZK6+iKbdC9oqS)sS`l|+1Q4BXkfWjTx$8|ouaWR+&h{cgVD})0^ zUFB@+vf@s_sb zwE+Y2O}h)Mn7|h;Z*Nb56AnAwyXe9k7!Dx`6lGj@i@YbXe-&V?(Tw`tZMYwc-VZ$Q{{n~`fEPOnE zf+29Z_tpsGu;*hPaE2}KSvX){yD(ZYib10L{(joyZ+26w>*H^CmGk_~=8$6{a-6%m z33~SNs_8{jP!J5z@N8}jCj&KEbflK}mNYN|*?T&71+}1Mh?O?@&hPzA^>k!`Ib^sf z_HvXu1z)5-szqC0;MwE6Nf!175bb+ye1C^Ww5qSk6&Yqt8Z~6H`Q#0BS%OZzHW&9% z#S}F{TP-1eRPbzZ2po&vH}vwWcjFvoT3M+#gljKom6oEaq?JVWeQem{wYd>`izq-L zM|n1V4uSnz=fvX@C)x@zF2%C0l@r%fATw(3`MnRGWF~$+wYM`FcVG2X$rYP5X_>C8 zFNl7GvJgbCZJXx!MbKPMQuJ5Xx%Hx8OZ=w${Sa;Mvywg<>+`8D$q0(LPi|t9gY~IBfn@c7 z0)&4uF@+?J{outK&~#vABW}?=w+~>J9?W}eJ)h>+FK^GhRxG9Zxh~+5>elOnI(M)Q zfj^p}@Elzq>!)dR<;%m_o2y?g=;Xb$y8yXsB;V}-at)ZoPw*@jR_0r5_K0UwsSsT8ITDHe}ed>KOOawP4C;P6^ z^q(vAelR)zyx{9Rhia^^DDFjT_f_{ zmcpc$*cf-ns|;TWW9_I?6smEi##7~{$q!9UO?|XQS@wc@Q6k+B|JR~Y6#|ftR65{G z%jcg>NZdr*`92VWhw!a<6NfZ=w?2u@5Nc&S*v`A*=?PI#ejw;z4B_!}Fe>)p)$EP8 z#G^);fgsmQcw|VAO&M^sSS;Uht_%pbYREivEs299))luzxp0h$)1^dby-1_G=+s+; z{(&ZnEPSJZ$!`7}--wp@SUTB*if?f>fr?`zdc+Rktz0QMpSvzp==!4`t^G~N`NOs| zG7Gi6mf)Q5qiS#c3VtKfW26|HXgRCbBI%VIsP&rgbJjPZ7@7$3-e3ymA#rb(hbp6@ zfHC-=mKooh5~{;ohrfSjWtwaCBnA3wrQT1v0 zk1Y%69%7`+Ix8=d>n`0DyO;6DeWd*eV)lWtQ~ddh4ild1Pf}{C%(q}v!Bg5dXHc6d zZ+<3g`2?zbtMTya(JZt13Xr|qJ3G(6g`S956j}(}@ss8=kWJHpK2~N&TWwCpYBK?? zOF^eDBnDZi2pjb6PiD1F0Hz~XYi&;)ttYJpzohY-eLpLJP)nFj)9rF|R(y%@x; zSG38=1uY#@ajsd1Du*zZl?c<2eOIarG9RoirO*c7Kc81}7X$AxKcPwX zUwIOp#>cOA{iWKkjI$+R{rS~sM2E^)H}(RE$>?ySS)>aR4JCR2DZpIrbQ=Tb;AU;9 zPSos@#$dT6Cu>Q{XLa_r<*g_k23AFN+E*#;Q!B>kSz-{1)UEbu56XW_K3=wq7A7R17in-nt%}-_qJwzCHKxyS5%{ak!5rJ2cMz< zTLt0np-ytb38#)RsN?aD4*1nQvfuewGYM=>O5{)Q!JJW_wtV^NF7)|(CQ-d-N3=Z@ zV^QX^CG`DjZ0A-n+i7m2DIg|68dYGX&__% z9<^F^bUUCprHokjnR7J+)pQ>};u?Uvaxko%f%{;upvu<++Rn`dvK~&!X4H$jvNOZo zFNJ@x<>$6F`yrdW934-P*Jj7wvL>Y&-y{fqQJuVr~zF9Tu2sD#vDJpG!1Y$#Kk(Gtjs3(r7>-eamqa!3t^R)BE=g;(u zP_BPN0|0K7v9<`wo?tbST(mw~rnB(M*gdGi+Q69&K4am~7Uq?mbA|R}*bujEJ|4Dp zFp!MqAN04qPP$X`rOsu+pKightf{9d`I{-*SCo4c7x@m$y9+Fv9~YSkSN;Ve-x)oD zvIldPsKPjU^z7kh?e$_ZX}@wVd0mZjmSIr(JR!AwTX$A6Tj`qqs@f=^oX6dH`tPQD zfHaUmCbxrGLB{0FqoPXs^2|-+&_8`vl}{j=2c4=;(BPQPdApuhwKg2%uXteJ3TsrI2dP{g~fOoLv zCQ;pp9#K_23TVSk^_lDA|D;Yrpxynl{x{+{1)#kN58jJjFJSM!TA$ftP7r!P8YYAH zpnO1+XWjL&yEYGyIacT4X}i8VgVi*DcQsRtA;8fjiE89&c&e=dew0{+|2a4r0c$>p z(1N>S<+%d}vET>~&wcdQuQs=>c5p4f$~)wh8aA#$_$88NFxyfQ9iTH%QJ)e11eb;X zFeJj*LGZEI6SO%lkEx|ds74|kMCXeZ<<_{E)#Aq$=+Xnkj5WWGcyET$KXD`*Qm2T6eM(rRm~g^pB!|Tw$U?dovX#B!J1&6d^0P(?-s9*LmaO~rh9jwF)!Uwc=nMX zn$bKt0{b9wR)w9Nls|YKZDX_ZH|;irEP?m&dUYvt)|k2Y-K)?m$5MBy2r2BaqMq_C z-9!S|7JmutKd=_OGH5wwW1-dFe*b=Lgg?VcMGfy)&6Q}=pw0{UN0`NJuQ2T!^IAq^ zvw&(Hk{JLM@>09!SAEce2+%7busrjKjZL@w^U-9beNCS{v1Bia(&;MmY z4pu*(0=}Ud&W`*NP*F9?CN=s1H+y_*J~26-YrsFu=IKTIk1GaZ&)=iovfNV&v02*A zpgtSKBA4D09}$d0%kV54EWS52zF-xGee4`rYbop@7aG?_ioC9KFgn?HUwkEPr*=41tD{RPtMi;T|)b zpA0b9%yI>Sz^KxO@)VMMbiiB)UN|>V6i82!2OO%2mPIRH3Sn-7Hu=lbb#*F|opW_1 z4M_{@phLr(Lo5kmk1V|J*t>K7>Qckm<7a+iwKMQp!$ghIP{b4L1?QPs#H|-_?#E28 z;$5e=BlFL*v$z~W+h0-lahx>*KysfaX9Xw+0I=KTS&~yfsEOp-RZVkK_^m2NnxO4? z*Eyh3sOR!sz@%b7LDv9>+N>4D+FVf@n{j4N^1N@*lW$SGGn<}qF6&#ryG|~|9V1udiP~pbJ*;F|iUGkt;#|lDt=m}Y z$~EEcPuus!?wume!2=J_`dm;&IBhu6 zFHny3zMUya6B;@kG!FwTUi~*edo2xO4CtW*De=QROS05IE}Z|Vez&#!*yDd|@zz^W zpqc9%L$l9TuVl+Ei>*tKBu22y&3iw-6!o0vJBztFFe$zjJP!r1ugi;r;AU_F#aFxs zsBp*we~rF1tzYZx>&SXEry}%he0)*cAyG)ElHZe>H8N_VgO{kcPaxd?7wGO;q82y7 zYb&p4A#IuDw()50z%$lWgg04X{PZ->qIhX(J9mW!5_e<_CPnw7TH9kwNw4I(9pU)5 z1AoJEU;B3@DPe7Hf7RJ5x7rR>y?(#2&I!Ed@R&!BS=(LJv= zQJ~#f)q1<20AeP;M(?|qy=jbMChZ((0j-IO2QPe05iR$g()2KW_qgB83@e%zbs>MH%Vq7x;-ao_Fo$hOyVl@ToRcs!D zdkbBzz03I}vZ%(w!WUk5ICHjS*=GfO_qVRLYKzFtkAm9K_^rqBdIXUo^h9eerE~a$ zQN=jGE0%WBf7CouVwd}riwa_j&yPlK7UZ*w5x%ubhERlR_e2zXI{W5Yt#V(ydIh;< zsrihP_j(GtzfHZEp6163Lrv~%pnv;r{#*e$2zAMM-Cpdfy1|0O%sV#B&Fp+Dl{g@V zZd{t;Y!m}c9MR-w1OZ{Lz*J~s|1$XoMfxm9>kh&B(?YwgFaZ?~1ZaiZ!gY5I$HqoF z6D{g&4HyE;QV;72EgVHFX{3-(f-kuB$u0&nP@oHxH#L0{b6U{SRkvFsi)4qk#+H?6 zC%K21rSIln0Da&l3{4(hFs|OqBq_Dvtpec&HRrX3(~ACSe`JHj4bm)%x?d>|J#I0U72d( zzo7x<-{nUETpbSyutLzXP5Vuvk;VqWmv+wut=O5|)ChrpZ9i1B#CewZzMLMFPBAfP z?Bv{9z3b3$_NIOe7T3j=O|}e1Yi#WI6C%_}ew$#nJ9U;jkH!}c-MTV<=|}F330Sba zuGADOYM|B;Xs0#{(EyyC2015NYNO!Td^Y9wsdFY$(}@w1?asDJYw7WJFDeUJVPTb0 zcscJ@zQR)()U^xyml3FQCY7yZ9)Ot%CENjm=nvLM2f%EyqFF< zW<3lttfF-nP1u)m-YJNA&x8pNvm0BiWGqRx{8)Nm7fXa)R{W2BQ|>*gY8PbI2r0 zRkF4)_x(8Bb>dt>uaukr8`qYoXJ+@2CH1I%zZNj$TC(rsib=`Dm}nimQ*?=|hB+~4 z-!e)RuDRbRWfUmee>d`r6vH>gS2;lcz{QI>c((5vX>l_-21f5I&lWJ*ugyPe^gG|Z zYF8f4vG6;47Pl00{oSyeDSQbZWpNR)q7OP9CBkR5AxpqMYNTaTfv`gH+g<~bGLSB7 zD|c6LEt4&7s#k7mFP$VUQx_snT`=EwkC)CTPr6`0dA?FkjOrm(y3)1(3&ix@;+M9_ z>;cwdv$-kthhe8(l+IT{=TL#~G_Tm(HT5k|fy9PjiL(YIW-EtSa?XtZL|9#?i;^A# zAJ6L+znfFIi!oU-2vdSN7Kw|;S|wT%M@Jl7wS{^X zme~rCrFVbX9y;DleC&Arw)ow9_n5whKQ}?z{G-iA^MAutYUFuHK1PJ-C>12b-sNxk zwacr|R%1SnUXJ>q8fR43Qw_I|x)lDo7JZdw%NgjhItToTrCcjPyT5@(96a1Zu`dNg zo3>B?jwO#+DR(aUKlfyAQ2;ea<{SfA?%?Lp{)5;VDLMO?jvePhyew^2^P`eSG-xuwxY6@Rh{P?n*D^C9WSo#3b|S z{PXUFr(%Cs9`-8AvzL zvF_)h5xk@)_|xT^`@Bm5y8<5EkBZu0IfnANYcoy{M;;{k38>gOndQBW>h5`$_U*x; zL1VCen>w-RJMOXVt%BeoSTlW;w!j{W!PkoxB65*%J<{*$J=_6W&b~b#$Emv*j6~mc zl^++kxP7=x@8i4+{8XP2j(8!A+7K)EfVL39Go&4D9_r=MDlE8U{EEe_-SW*vT)u~i zXo_w`g@J$Jg|M>be@E+rY0s4cst1FWNFOjA$2jAGc#UTQW!ZL5J6C>`G!)l75r>0n zs`kQkuRja|X2GY(5oBZe{x%>5jco>I!R8Ue8!-oBam*uD)$eQTi$hJt4ZELhKa(_l zYAP+p`@BH?IY<~7e>>HO<+JvQlhUz^;S?3&{5Xm^ByD}YrD=TmT~x~LG))MzILP{L zbvNizy)!MaCsnwoEWmJuod=cafk-|>8%#hRfZ*aNn74^LJx`g&SX6SB*CJEPS~7 z9_!DKvY5X7=dOYRhgCz6?vd>y&E(}#V)R>BEWb7%DUdC)VmaWa4k;W{{Wj5T!RqF1 zBh{8tMv~5%tjCA+FrVKx3U?a4GFW*4_j)_{&D|l7D%iqah}*0ia8S+ zK;LW&#gOkdH8h(v-p&5>MP?{HZG7BNNkL}ZPyv$uKiQpBKY@&I9)vpS3np7jm~dxs zArCVB-A*#qUj&y1(g|3#+j_}JHJuRxaZZP6{6aP8?XMMEt?YP}x&1ARcG6q^6LiOR zUhYpthAwqbl2tl$!6kUA#j829{2|~Bbl%MB2fFuYiUI-c-j*jeXDyM~V<~Ywx-tgM zL^-$=*=heg)yqoHQ^=Xa=i{=-P*U{>5ykRj;WtGEyK;tdnxWh&gSliWWvQFS!0vE$ zD_~5YHH(0*gxriC0cfe(0sz@enxaHNMb)&zn3Z3RbRkk{!z5x}K(AhzvBBG?S_ z6ebKja<3>@b8t4ij`H+;rfJ4A`w`5_by4kgrFa_O4T(sB$1C(yjd}CiTSW5NM_F0r z+9yLN=*7sETAwjhjihGGnc1LmC%4?MWx3Kejts}sa6aZFpE%HtRv?)N;C68%0>MKE z!Y+bI)!6tzGxxVd0$b~)wuCf+N0u=MrtpkQnRg?^3b%FY?~<|sHHSQDcymlX8pVlI z5w;&BDwMz+H5MvC9nS{fWcz+Up|ysqHG-HD_%h-*ByV&G#-R*!I%Ip0Osjic(6lUt zzGTn8@6vCHvNYWhZg`!+l)J3seM6vL5Rf2P10vi~IeBi3&~8p?UEXVJ+p*F$tL4!v zQ|pgAGWej^!`MC0$fF^peescd2D3lnhh1YwY5`!|6hV zswiNRUoGPCbg;798{^z9dA8+=@+H{mQGGi6Rrk2Dgabz#4tJ!%C$O0$Ej|RL0<^}} z*pp~&&Al$cRnp49>!#(M#dF{4?K@Xfcon2$HoS7L%}!LGm= zxMu_FUxzGW1OLRsI9z9do)!Al2vz!illJ3WDxo4L3*Eqnt$+ML_p}tO2PJyOx^6rz zW;eA9Q?KWmLOC6T+X|2rUXit31bH$=sJoM}0=}gBgNz)_N|t6xG;mRE!KJ?cO>NC9 z5xgT-NQlON)6Lw4;Ss2Jnr3>E5CX(S8ch zIjqkIkbfQk%!TF}3rBEG^Wp%2lUWy=(Q?IVN>|JCxJVI}_B2e7zhzb4;{NFib7@Ia3rI5lz1rj* z@crkC`!JA?jsUVg5BPRA*tU~^FdGqtd3YX?m0!2z?aD^u$m`beLcl?Ga7r`&t7bzg zEuetaD+);)ER2ApKkLDJ=8CxmU(S}CSCL++JUABGv8R2sYO`h_GjriWgvrVyE*{{% zAV$%|`wu76x}n(4MS-ysx=&c)|8F@Ia?=#Kt8G6(Ni4 zJvrcGz2Yvy-)VIdc7G0D(lqCLy6SmLi!0>z=Yn^s{^B988%4J!{gTmJn8bv0aX9_M zMTA8eNjp%swbt z&HYj6nQ4!qBXNkCgy5>oSL`Eug+f-8Bv?W8<6rtHEtNu&(x9brI7P{&4d|k{(k5nf zWjX@P4+WmbVMC2fadZZdt@NDoO<>0~$Ermtqb`xeP2$a`S}XXlGrL^(4Tb#sL1Aon zq(NUlRfBHv&Wy>t965S`Ro|k`e!qDYfLi{d1qHU&_ise=!T9R6HLP5Pv|b@g6ZwMf z;J9=D2Bco$IHkJ-I^BBGqYF;(>=!-M);#+F47Z;Uw1UNfoZ*wWTHJP$dyx2}zC0s7 zfftXe06lfp9$eh-MmFSzCo=gQC02^*-59^jRENX|Y7;AiS%~FLl{l`kJw>xRV${8&= zvQ&_O`dTX!+u32A10u$|ySq!f)a+yRMML@2w-Rp`i6ux?C4p!{=s{19zvnXstCHxq z?q$}hg?maG9eGGVVB_{T=VkPp6@?m!~g#v#{CDS zjy2*3t{!cp``tibjdf7E(X7{%9PIPt%lZquEV-%GH%1FCEK-qeNt4Dv_beu$9&-ZZ z1c8lv2#`Fk`NV~C>~?d`qVbH$^C}K&YZ(vYu^$;U^4s;9m;Owjb!#giW*ww8_|?ssU~_qT{!0@ ztrd#M&eT9GWJ(O%Sh8L!JB)6RTOmx6#NTgnUmCtjvtvw>egu$zWC$XGBc-Lt9h1dL zC_mIKKLW-Bb-1ZUv?7^1za@aN&M_4EQda`7V&QX%u2NQ?Mt(S_N!;UrAJJS>zx)gn zZ1?t@2}*uQp(FK@Ilo8_ZV2vl^=`z<|5 zWDSGE7GPkMGct0cRc$}J{uXq&KJtxRW}j1*l!3}rb4y~`>~Bs+v0aW)4-=NI=G(hw z_de18O5V4_H;*7`QBLe2iV`VWG<`_$=0}vf4)zvfM@J?Geo`s5ep|oM(EygU4SBw} z4W@8~6XQuXcmNa^+oQP^==bcPuE*$=Z$7}NHF^lWR#VI2nK^=XnQs?IB=m>$k1$C7 zp&@zzPb>n#N}9k&kQPVQC1xl>*qcl%uY1cjY$Td>2pNfGp-#_dZi#nYknV``i=mL{ zzk89y%9FJO!2Oea6tyhNmgHf@fVeG_WTU`T&+pCd()yHNScp?C{%MXbLBr4cEmShd%E+HJ@Lx?qB~r)^=|&QtF}34HJ3* zo=%Dz8vwWK?T=yKlHIGjgKqrH)USb;_H_AKMV`J0<8_622AG-{+gVGF)+eGEaDX(d z7Rsn)Cm;W(u`mF3EB*ew3VZ7iT&Dh++h@DH=j+zM9Ng~Y`jQoE!X=Bd7GMw#0jf{R zS?;lZk5!qm9Y!eitkH*c&b$A)b0^ikp_pNDQPyu4!{Vbns98Zlxm!qdJTR3V&KC@j zCwk?7`n!*Y4Kv5%G|fcBrd5PRXv#**S0}{!B|f#k&MndZbS<~KW82EEe#-KD$t*%( z>zCFIY%5>aT3^1R8~WzHB4ZJkkr(9Js|%@VH6n~_cc`CMe}ApA@DB_Um34g*;+oeM zigvG$vAef(w)U7T6wl}>vuyI?>Ti36?z{UnyaSEsm51yq^Jc)KH!L8j4WhH+wVnj= zWf%V<9@^NfJy@}iVtCrz4c4Jw1j6P4^7HbPHh+L7@e0sFK4@`wX>m8Y=$gZq&m$T! z_l!Y#s4aoozvPEvx9&^Eog51BtV~M<;97&5QTy<6%0n(;nttueGtLA>RR_aBF@=ds zitx|c51=kFeqcGOpLuzc=S)ffyFs5+me199eb2vPN1LNN{haeAt?yoJQJm>lVOxCF zyEI(C*czP~T2cRs`HTF*U_R0|-kumr;;saanym7jzs3V=PP=d2)d-sXJc5zAjZevU zr>*V8)?TK?q%^C}Stts$RnU)s@#gFofAQsC&=w#a^rPoLkctfnj#1TxV_u4e%Bd1P zza85dhI(Gyi;S-yODVn~4sCectnBu!`(nub`Hx-o-u^gnA3! z{8MstyK9wURoh3n1mSf_f*?=((NjZ-1oaW(0LKi7ZF}XfJCGE82XhkMcasDq`pg&1 zSq*EeHg)thW=g|}hQeK6haEq>yc`pW=Y1weA7_Bwl z1T7sTB7S6nN$k66x@`L+h>8Zt_y{pa!7sA3%XA!P6>d z;?D?rBZ;~PLmanUmBvbU(iIggmkmYlrRMQn_Ou!^`z+m(vAS-tiL>t)VK{3)*IGWG zS5xIP+A@L~g%7mbiXPg})={8b`f~fVr5!`+Y}2RHo?}hTyq_V5;Kt#TaG)G^PeyxT zN23z=f+xFz%iaA74d6WO*T<(y!p;q8zbN)|x@;YVUi}s4pOT%;m!(lfCy}&zt_BY3NimJ8QEh{6fia@ zs25LBc`2`)=>2W|?^nMIGHr_rHI3@@7wB~9>H0vVFJu!0*I6;rX_AK|y%}81pWI_g zpfTv0@tvTaV0L@FYEu|t!K?gOvBfC5B zZSmzh%@4AdA#Dv1Z;jvIr4`c({#@jt+Wl{UH7^NAwj#{}O*$CPu`GBo9?l<&VBV@# z1+%*SeVHweYIpbApl>x7ZR1lA9sQEKcV*)9F_{~dxh}Qv2O14{pdNQ0@SabdyxckG zUOn7d$IP_y7y(cOPM6KXl7o z=%(L+-1i=QU(e!wS=^#Ux|?BDB)gbyvG0uB;8m%wG$T7=XI5kp67%32dR!%dWkELg zqykGSyLSfu!nyIZ#Rp3l@Nv{pEMM+vDB^Jav9v(en2_$Ytv^|D^?b4s0V3rCpvDE% ztTIYutZvsoPcC^#ao8Q{Fa2?41FiJ|l6sqy7%2M1j2z-+^9fvxYU1z5K{wU*w>Rsf zEIm8f4jmea-E{4pZ%^}Z>(q>N!n2$L=MsY=2KBMf%}NwkPmoIVVE!@^lVkg( zlJaa$M0$HRmkTI2UMC993oLWU;N+~ktKbRM?U|F7(8iPGWBFyZb4CJq2$_`G)%~lB zcWs==R}IhAam~td>8Ta|QUL~(KjGU--4eo#j->dp&Ngep3=1?ewL|94FqamGs7-@dwZe0YU0qM7B{f=syCg1q9jysEkB|4 zD#93!WhPIE`F6+OpT<3nE$hnU8l&?5q+4g2yXYbW*v@l(2G^;8lRTERXK_~IS=(|( zGn6OL)QC4aZ{8j)m2F6*D|Ngy`vQ_@5Z`M6ZjZE(Xsjx+jy{fFxJ{|YnkKWbyt zWt6A!%H-BhK$XR9gx}W|!msxNVFrwze|A3LV!cy`uZ$%5aYf20mV4uUre3satDmpI zTKM<#)k2R^NqHBhR1TN{to^{x!K|#u)>axaUNVkyQ`KEHp(&1L4>&uo8-aqGreW#G z)&m@lV7Lj$u$c)+Oj|l|{Bu*Fq-3v4f30k4S2ZFoDEV)AH2L zT2O9Y0Hks>0_i5t6CgCuCyGbLY&Ldww9M9f!R2ePb5E6N(GHo4Y}>k0jONc(8^%EF z3zpJ9<-=jE^n>yV`BYDcQH`B&?!lKofY8 zgO^}JNDn9%30Oo<2kb8A{CLTz5<{Q*fFkMmFTa0Z@?_*qEFoeZZoL2f9%oW=xS~V9 z)5?1qi#b@C9EP$0^Sp?&_x}R92hkhh&l@XRBn=2`iI8H09>X$e&zU>klQPWW*Ag02 z2R<`3Fll`~)BI=C=Rf8WX(h`A-$}M#i|3*XzquwK&dP3gb>`iPh`acxCuwy0DVGb) z!)8BOA-x5&3kx#O`>b3qOfzB)?s|isYW${m2kC$QdExKk4}c{Cgi2PC;26epjS2ki z1!!bjB2U_|f7L#LX=(gxS^Hvk>H@hXUTnkkYLwy2+6Aq@c=&&xn6Hy z>)M7c&8Y1fC01g)V;y68^mi_L*J=NWEAFP60QviIhVf;43aTg;NY&}A(%6fN6kK9C zqeXa;WrFAa9^m-5jN{G)7J5Y1S)r`0>dN)a#xJwf;0kJt(2vtj9Cz-7h&kJq=x>I@ zoTg}n&XkPiU-11cEm2FArejog7mE^RU}2c-wR*PoY)C3Zmu{-yRnq&>(qJFAe}P&h zasF5skNZzxLj?M`2|e6p8>`Qj_RKADb=gE@Rh za6hqd26wqRaj98Gq2_txuKpv%pxtA{V<^tFj`Dz%UxT=OmPOHoWB2lzKR3FsL+0d+ z`a;X}S5V%G41x7>R3N4Gzx|FWmK3E~-~WSyJ~TJdMoT-U#&^W>(kSeXx(G;i7Lgy$9vk|&R<7LhrxJbt z<6bJGFx$cf^Pk53^(%VVp8OWs@_pc#L8!-nvc0y5+wPi{+xP5Sv=3+$|NP{&MWFr$ zTN60#{qLN+TbxLa}i=XRr4^)@!Q6ItQ+#2&x+CvP28J1bgMhZ z`Rr^}Vd@IrytSpFO%kVXNgDD{CpGv6HY;EThLierAl#_6%2kdAI4~G`v2!d|Ogw#a zimDuNp@6pzKm0=1>w`NYpyji|SNz4q!#sR5!3NN)xqxzo1>$DT75Y&5l-Jym*7H3j zkbd~%ISa?_{Cf}C%GP5gEG33I?g$C0`-!QsU?$*{%S>J3z|4KFIU9v@ovFv9%F2w` zH$C@wXUgZPX{CT-0HY5#^ZRyV;JIT6dYk?r6X-p6!Iujz3hW6bCUj9FRk-$g=8|F{{SjiL|ske3Hsf~2G$Gh zbH6Y7GYekOkxKTQKJ+Sxl`lAX0cN62ILn^A2HiZCKz?00{jr8vZ`&Yj(2$#*7fkG~ zPGPS#Qo4|4>?gs!*rFF@<@Vt6nX!{}+Ii$8qy`4KgCU6s&Ipp$I=S>|kf^dPaX7oq z@#drN=|=T-PD{E{3i0t~vy$h}3o-EDfIutnET&l)Mmf-)vH;N%+T&j!E)FL(7fPpMlV<4%HJh5Dd70 zZRB`E$|FaQqt;JRJlDMlga*Q?olUk+CKk=n1}_t4C+%D!&7}*j9zO3rouWJN65=kM zBBh@w%N$P!K)vb5>9=(_|2a0NSVHA;j|5S-1jtichGm1#&%hC?Zerz?iwowmS;h4c z`PY+PB)@bGd$eNT7Fj|Jj(L6FH4>tMT`N&OHd{eloFRa?)=i~tDNt9xeS}`^>JkdJ zvOW7;NlD78e%B-9!-8i0vunF)i@727q3{A|%p zNP`FAL6_;|+4?8S>D=Y6iZYEgVR?X2bYQn)Ys4`6U>TGUJh&6r*)22Jh{?UFJ#{vh zN3qh6#?Fp}$yvL0%Mol<>F>Pd#(#%A6F>U|kX~2ULgG3EDfbuf&Pm<&q0hNFFUTj& z|8##fZ7O3SZz6wX{hPH=m;Wz%;_6?B#EX2#&VSN<-3K<-{fgh(SXGqkpLBGH5DJXc zhDRmPWT~ptPei40rSWE%(q;CWhOH*Hh{e+R;4~*(g5XeBMF+T}fB^Z4@^spa`Kz z3Bj4i=WYT8X#bm_vf-lWFF>NR1!}>s?OuosJ@nNI3rdK}46IKclp)+7Z4Q)!0e<0} zBusUYo(!;IaT$TiJFiLNqK(kkdX&Lgzc~qQK!aSX-~4&5R@WH4_vF&o(1BiIr_%Up zeC}0bPhp=*{to<2*2_#`rC!Dw1=Uo}aVQr}5D3XL?;b&|UwB{OG0{j+@>7V{Im28M z-(My8{Vak{RE^1pjv9HAPdf{YoSuQWEDHdxxUmFbzvNQWoW8oyum<_CqG`)e+PG`} zk&SmUP47dwK9F*cPW6Di2PzZjK>G1*>M3G#-?nd#>8F{ZT=_+JnE}4#N(|$on(^1+ zym4-$`*g22JFF-!e#cf+*^|6IpdNLyT2Gmar&<-+E`QGzh*!WgkP4mbZwo0_``g1_ zRzKkXz6Nr>c|E>=DI5PoeE#OzZ9s;hYrjN{+84OnKCzx*viO2gra8y*Q}2cv@5`B` zV)w|~m%n+c)BiH%{X6y*61@GiAOYnLU7W+krO>29Lu@_)sIR9hv*AE^$`;(Fn=+JKDkNrJ>p)Cit=iW6vvX z5lMrqXWtqNyrgrtqjnpd?%Tfb53}&F zvuBIqQ4h%WD}Thm6?xbRxj~z9mOiQbZT`%epSab9j=QkcCnnv{gyI?VuKYTmqM#<( zR$rID=^l8cHgs~s`WfKqbu7Qj@esud^k0pUOHVm_36eJ{B5R6OggA2a+WR8h^-jxh zTiXcAZd!R(*Wtr6An`o$Cq74UQ8a!E)Pdw&Hk&UtzKfjt;G-pSUob{hZ48vBv$Xfv z_UCYfF=li%Zbka!Q`K&}1`(}GIbB4RB_>-g+Kp7`R(l^MO#ULSEt#;_jgBa6X!$B& zUU6)(ehA9BS!}%O3In~IS05n(8$-m~+Z#dxZ*JxaKQhPFgBO6Vhvn&6o<+^9O ztg;mOFfeuy-SGpM?h#WysRKpe4>3qk=qbSQ{Onhr5Gc>V&tS^Bz^dcTv}D+W4D-u! z(l)Zeg%5)c<1OWP2mb-|_3yAgJXp%7obWWR@DOE7qT7-gr1?kFEkWT<$Y28kq~_^ z%qWK9EKan59%)ux1g4}G#Ysgpz?jQxLrbYJ29%ZMl=%(Rd@z z`HNTL&qIrw-q#L8=*-#j8Mp=MINRkggJV&^v;&%8eSDKDJ=38I3=0~r|2Tmdx3<)4 zs=*Gpe0WCucmvhCW5))sXIrziTFZP3=tt>vupn;b;I_K{q&tJSn?Lg85MfTc`2Bb#2tp`*b2+LHnNf__KcrWe`-_{cR#B4kIRSpr^}In>5%u2RGAHoFomZ)n~v z+-{Gd!E?8^7xZS$0g(xlm{tFn*oF>2Gey@MZ`4{H>{AO zQu+PIR<9~aWeLrdJC_)v2BVI@yeJ>lU5n_^IsI3~%tu^}NBMpND=%twLF?k4J^B9o z>hF8gvCZYR5!pHSv$>rIVD#eBgN5ftIf7YuAI%Rey#QuPJa-1HosviU*Os!+*+t&WVw^8j#488o^K2xH4zZ*`pOs9&J zqQf?gJk#g%)!tp53ehu{ILBGzg-HhsBLXQS)yQ#ebSFHagMNw%J_1Lbd^~D;^TDLd z-dX1065O{&A$xXA%%~tVjr%O(kuJpPAGEc9|0rSmeJCLq5WOH0iteWg6PM?bHB9*j z9rFC7hSLMWW!$9X28&ZNX@l3@_A1UZln@Vkb1}*7EbIIHuw2oB*dS5Y?uETAleTLg zYjZiDwW{5I{UQIF^))(%nt#?O{)^`ff8%ld=dGV=m{VPSEpfD=>8_#wh3Fj{ zt}>lTqhzN=!6DUenO|;v;1ENy7a!fnWFc-DQ2`R4C)a8%P4}B3i9VXN9 zu{ptwJ+k-8)lqAaTsrC^aQm$Wq&dCcStV=ih##4FT7y-5pj=e0_f*emfS7C2O3ZI7 zHH-)%;(J!O+8+D;ssalDp&M^B2c`B!nz{mHKYOM5Z>qDx8i7N(Qxz!N$2X|TGstsP zcS?BJ+b;nXjyp9z-l&r0`Ino~E0Szgjo&gW^WMq0N<9mD5DLBmbj#KT2zl!)5HDz~ z4V0sS5WI!!o2`&RUc=5lc+Ot3(VliwD2gBU>eQu^;adjj_2>Gm7$XCqO-#L5KB^0F z+J^`L@{iL>gmb~{SCS97eYpk{r%gh*(z8@}uV2_?r4zbJ_rRtQIfDiWxBat7v6J@- zG!-AnIrXoJ051|K~B_h;q z&FLW0uR)w20hv`)^7Gx(2=~mq7d^jcH*a@^k?COY9B3eT?xNRX+s?bOi@}DY_U_XX5qJlc;eqk)|HngSQ#LCZIT;Y?4NRu_2Kg)mh?HiFdQ| zNmaMK-s}O`-B;Pb=qpxwK6`nGf;(Bm0bpTCdgSwNX)B1zopoXv0|lKE*m$EM=XOeMeIhtW7%U*eafvVYSE}m_#Ru58(T;EBRlK8j5mk_5Z*GRPubClY35l-GW=kF+t>>@qWlr+QI zuHjMHK#%arU8arLW`V9%fbrOTaTj0$oAhZ%?4b*{o#p&)-pin|*oQB+$(QEt{Csqb zIDYl!O%hkOFNcc!ugrcdCryshgJEPBQWvzM)vsr+6eDMtRT)(XM}vlibF);-LPIO?*IqGx_OD@L986T&Z%o5 z5v$AipuZxX`QB8lEN!=1uM~Qej#W51-1=$g={sI|aQ1CF??ar?JC3dAYDP(!f2f=v zXZmBE7=tfJ!Fm=faFRzTIqT54pOl{ix93U@$S{-gbNek9Tf92Jdjcu69x#rSW#sf?G~lBesmX{4}x=q4y}##y@5f;@smn~cI&q=`^=$o z2j|iBll<=nf6{^ORLknQz$G^B*j^6IIpzaaO{U)E5CA{xhsO%T?5hf%f!qS$ZHM>; z)g8@F&W25ZTGCDl#?J)|fWAFQw|)YdAF!JHE8vDUj^d;Vtsul}C;Wmgk{p&zOW*zA zS?<~BVbp95ni=*T+#{$Db3r#A*i8%fQM`!4=e zko33{)h>TNi|?2D?f@`L_nLC)SPXFoU}#|CPz$ZA@f$OMp6gOGqri{5KjViS72EK? zkjU&}ptF%3k^_7G^Kq%F*J^#-0Qwv-tEtk6o`*UR9ckjEl`Q{v3-U?J+OP}lr{tQuO@-z-jD1} z&lJjolC#GvczAS$UUrb;7CwbRqM`Lp`*B#`6muXG7{L%o%;}sH6Au@?Y1)!IVpjfP zL=!Kg8GL1_@2u*(Ld8#7M&Mgro)Y4Av}vp`c0C_VM;XF127d_;d~2^f?~<05;j}cE zJ-bfSy|g)4(kVe;VhqBhgY?(X=#=9NJ;%BLZGH0-5b~iX&RLr14%K9uC>=gUHaR_& zjPm4S{*tOi2iAG$%ECv66GVwnGaxvFnbN$rR~kr`A&Hiy%BPe(6HqbATpYZ2&d$_zH}DH`!C5t1uG1 z%w`bc&P1oWK>x?;!2Uu#9+HZ6heV4v*z`d79Ye2MkkIkIzu0Z=G$kt689%NHt{d9) zJ+>^&o_c27sR|a^-27xda1=KHpU04!DFWVQRNGll$UCZi+ovgw6o^IYioD!rydJ~+ zVs~QGcj0wAI@$h_%00ovo|9sfdqJYTz-|{2O7^30tsP^8v5^`>ior)zXYX15eHpeV zbC*vqPg}xs!{0xl7qY&poF4NX3gqg5Ayjk9)-jlBNYv^!wg1D2;$3E3;WMOJQrHDn z7M^Zwms?1k%ax64Ms#bs7P?C-&_OKZe-@CBD59kB9@1*6%-Gc83vXirM~jrb_pzA+0O7jaQTuU+n(*v5N?AG$&viI zY_>dT797cGN6|gLMZJFV7Uk(=Mb0Kp$91=+RA&XiE1e9_^Ei8FczeyMb#=IVeKYEL z<+k$il7#CAV}=bhGi_F!mv$c2rKJQU_$DJ|g>%q|`*0gQ@3p4Lq`X&R3w=`2a`=R7 z^@Qp{f=s{Q>**tb-6T@Mx-yjqi-UAS*%lUkusrP%iv2|)ZXTEQ{X~4Y9eM&_j2XOo z+=H47X>1$!(xVZ1_&Feb;TF}5LQgEvL!AmMUuPQ;xRcZ!eDtO~d(5D|=~7rwV32e% zXIRK(gW`UN&&hPfuP&<7FCzizTMXj08iTDvb+Jc)C6>0aTrET23$9{$dg?j=)C}H{iFw(UevCUF=-? zR&=0Z_4q+1W1?XwIyX+twa70kl4YMF1uFFYo8}->bnr9f)>HnzOC%7Fu_&{nn@<{EW z6~_z~b=mUF8mcaS8i3*dx6tp4NkfTR=)u^Pg=s%;ercIMipmz^o#ErF$q;b>=!~+b z1RiTqK@Rc-$+R1_#{1Rp&DDnV8e$>8Mhv`o(O0(Y_*bL`tA9i(YO=N;_jCiW2YVgV*!J@nJ(sTMQ7^r4jp4SeyCE{va#;QaDh}ER?cQI! zd6%+_V26_|#rZKP zi^rkr$>}5$Ym!!6Y)l{y&SO~S;N9(7=F$B8R(4a`VSouF>$q?*^`=W?wF$1de}6%E zm3&llQUK-(t#-QIz27qup_sCqm1nyhl5;`-%F>s!s!JgLrj3h63ywotIb4rqKDQ8= z)=&ifb;u$F=h+7lx3ycgqvR{9T_ByWE4oRR!`u1$s{go!y2@~Q;4EBoJiXVcH$w&B zw0U5o3IG!+`cFDhtQR~fS7ovhBeL_u<(;%SJV0XH_nrc`e{+UT{?kgy^!NuZLKgix z;3p@AApK)EpsKea{^*>dy3-aYg(Qal82!e!#>qBURHE(Tr(}al^4b; z+|hbc`~<#mdNa)>mc&w=dah=WTUJrh zG!mnrhV|l1F5m)*km`E?j*DQ2AWQ&ziYl*edZEQ@e1v{x%G=5QsQ*l^g@@N;S3Awq znFpOl+pJ|whU<`|0zM!eV;<9s6@$Q5MVvxgH^cZ%#Z7ZeYA5zGv=o|x$Jlcb%{F$N z+HIdDEYNG|d3^-17IG4}Z^HD5UhD>6fvv`ClN&wCLvy;1ZZsAjF+Fcr>dJ>jm~9k> z(H1`6rwE+N=9H8a6MKhtsUna^^b3O$Mn`C6i8WBpQC@T2}9$Hnj7INc8)B< zLYV?|qI<8IEY": temp_prefix = [temp_role+temp_text for temp_role, temp_text in prefix] - temp_prefix.append(role) # need to be concated with [EOS] in practice + temp_prefix.append(role) temp_reply = text # last part of dialog, the text chosen_sample = { 'extended':[ @@ -148,7 +146,7 @@ def _split_dialogue(text: str): prefix.append([role, text]) elif role == "<|assistant|>": temp_prefix = [temp_role+temp_text for temp_role, temp_text in prefix] - temp_prefix.append(role) # need to be concated with [EOS] in practice + temp_prefix.append(role) temp_reply = text # last part of dialog, the text rejected_sample = { 'extended':[ @@ -184,56 +182,13 @@ def _split_dialogue(text: str): return samples if __name__ == "__main__": - # get a global index generator global_index_generator = gen_global_index() - - # prepare to post-processing - res = { - 'hhrlhf':[], - } - - prompts = { - 'hhrlhf': '', - 'summarize':'', - 'webgpt':'', - 'tldr':'', - } - # process raw datasets - # hhrlhf - res['hhrlhf'] = [ - hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','harmless-base'),'train.jsonl',global_index_generator,split='train'), - # hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','harmless-base'),'test.jsonl',global_index_generator,split='test'), - hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-base'),'train.jsonl',global_index_generator,split='train'), - # hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-base'),'test.jsonl',global_index_generator,split='test'), - hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-online'),'train.jsonl',global_index_generator,split='train'), - # hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-online'),'test.jsonl',global_index_generator,split='test'), - hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-rejection'),'train.jsonl',global_index_generator,split='train'), - # hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-rejection'),'test.jsonl',global_index_generator,split='test'), - ] + + hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','harmless-base'),'train.jsonl',global_index_generator,split='train'), + hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-base'),'train.jsonl',global_index_generator,split='train'), + hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-online'),'train.jsonl',global_index_generator,split='train'), + hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-rejection'),'train.jsonl',global_index_generator,split='train'), hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','harmless-base'),'test.jsonl',global_index_generator,split='dev') hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-base'),'test.jsonl',global_index_generator,split='dev') hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-online'),'test.jsonl',global_index_generator,split='dev') - hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-rejection'),'test.jsonl',global_index_generator,split='dev') - - global_prefixes = [] - global_extended_samples = 0 - for key in res: - for dataset in res[key]: - for sample in dataset: - for sub_sample in sample['extended']: - prefix = "".join(sub_sample['prefix']) - prefix = prefix.replace("<|prompter|>", "\n\nHuman: ").replace("<|assistant|>", "\n\nAssistant: ") - prefix = prompts[key].replace('', prefix) - global_prefixes.append( - { - 'id': sub_sample['id'], - 'prefix': prefix, - 'target_num': sub_sample['target_num'], - 'target': [] - } - ) - global_extended_samples += sub_sample['target_num'] - - - print('Total Num: {}'.format(len(global_prefixes))) - print('Total Extended Num: {}'.format(global_extended_samples)) \ No newline at end of file + hhrlhf_preprocess(os.path.join('..','..','data','raw_data','hhrlhf','helpful-rejection'),'test.jsonl',global_index_generator,split='dev') \ No newline at end of file diff --git a/PRO/train/preprocess_data/step_2_gen_train_data.py b/PRO/train/hh_preprocess_data/step_2_gen_train_data.py similarity index 93% rename from PRO/train/preprocess_data/step_2_gen_train_data.py rename to PRO/train/hh_preprocess_data/step_2_gen_train_data.py index a64605f2..bbeb4be2 100644 --- a/PRO/train/preprocess_data/step_2_gen_train_data.py +++ b/PRO/train/hh_preprocess_data/step_2_gen_train_data.py @@ -5,9 +5,8 @@ import random import numpy as np import tqdm -from utils.metrics import create_reward_fn_3 -get_score, reward_batch_size = create_reward_fn_3() -# get_score = None +from utils.metrics_hh import create_reward_fn +get_score, reward_batch_size = create_reward_fn() def split_trans(split): if split == 'train' or split == 'test' or split == 'dev': @@ -48,7 +47,6 @@ def reward_model_ranker(prefixes, suffixes): def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, split='train'): file_list = [] - # for root,dirs,files in os.walk('refilled_data'): for root,dirs,files in os.walk(root_dir): for file in files: if not file.endswith("json"): @@ -103,16 +101,19 @@ def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, for l in training_data: l['reward'] = [1.0] * len(l['suffix']) + for l in training_data: + l['sft_index'] = 0 + return training_data if __name__ == '__main__': root_dir = os.path.join('..','..','data',"preprocessed_data") data_aug = False - os.makedirs(os.path.join('..','..','data','train_len2'), exist_ok=True) + os.makedirs(os.path.join('..','..','data','hh_train_len2'), exist_ok=True) random.seed(42) training_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf"), if_score = True, if_rerank=True, split = 'train') random.shuffle(training_data) - with open(os.path.join('..','..','data','train_len2','train.json'),'w', encoding='utf-8') as f: + with open(os.path.join('..','..','data','hh_train_len2','train.json'),'w', encoding='utf-8') as f: for sample in training_data: f.write(json.dumps(sample,ensure_ascii=False)+'\n') @@ -124,7 +125,6 @@ def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, random.seed(42) helpful_base_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf", "helpful-base"), if_score = True, if_rerank=False, split = 'dev') random.shuffle(helpful_base_dev_data) - random.seed(42) helpful_online_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf", "helpful-online"), if_score = True, if_rerank=False, split = 'dev') @@ -137,6 +137,6 @@ def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, total_dev_data = harmless_base_dev_data + helpful_base_dev_data + helpful_online_dev_data + helpful_rejection_dev_data random.shuffle(total_dev_data) total_dev_data = total_dev_data[:280] - with open(os.path.join('..','..','data','dev','sampled_dev.json'),'w', encoding='utf-8') as f: + with open(os.path.join('..','..','data','hh_dev','sampled_dev.json'),'w', encoding='utf-8') as f: for sample in total_dev_data: f.write(json.dumps(sample,ensure_ascii=False)+'\n') \ No newline at end of file diff --git a/PRO/train/preprocess_data/step_3_gen_test_data.py b/PRO/train/hh_preprocess_data/step_3_gen_test_data.py similarity index 91% rename from PRO/train/preprocess_data/step_3_gen_test_data.py rename to PRO/train/hh_preprocess_data/step_3_gen_test_data.py index 0aea94db..4945a884 100644 --- a/PRO/train/preprocess_data/step_3_gen_test_data.py +++ b/PRO/train/hh_preprocess_data/step_3_gen_test_data.py @@ -47,7 +47,6 @@ def reward_model_ranker(prefixes, suffixes): def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, split='train'): file_list = [] - # for root,dirs,files in os.walk('refilled_data'): for root,dirs,files in os.walk(root_dir): for file in files: if not file.endswith("json"): @@ -103,37 +102,40 @@ def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, for l in training_data: l['reward'] = [1.0] * len(l['suffix']) + for l in training_data: + l['sft_index'] = 0 + return training_data if __name__ == '__main__': root_dir = os.path.join('..','..','data',"preprocessed_data") data_aug = False - os.makedirs(os.path.join('..','..','data','test'), exist_ok=True) + os.makedirs(os.path.join('..','..','data','hh_test'), exist_ok=True) random.seed(42) harmless_base_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf", "harmless-base"), if_score = True, if_rerank=True, split = 'dev') random.shuffle(harmless_base_dev_data) - with open(os.path.join('..','..','data','test','harmless_base.json'),'w', encoding='utf-8') as f: + with open(os.path.join('..','..','data','hh_test','harmless_base.json'),'w', encoding='utf-8') as f: for sample in harmless_base_dev_data: f.write(json.dumps(sample,ensure_ascii=False)+'\n') random.seed(42) helpful_base_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf", "helpful-base"), if_score = True, if_rerank=False, split = 'dev') random.shuffle(helpful_base_dev_data) - with open(os.path.join('..','..','data','test','helpful_base.json'),'w', encoding='utf-8') as f: + with open(os.path.join('..','..','data','hh_test','helpful_base.json'),'w', encoding='utf-8') as f: for sample in helpful_base_dev_data: f.write(json.dumps(sample,ensure_ascii=False)+'\n') random.seed(42) helpful_online_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf", "helpful-online"), if_score = True, if_rerank=False, split = 'dev') random.shuffle(helpful_online_dev_data) - with open(os.path.join('..','..','data','test','helpful_online.json'),'w', encoding='utf-8') as f: + with open(os.path.join('..','..','data','hh_test','helpful_online.json'),'w', encoding='utf-8') as f: for sample in helpful_online_dev_data: f.write(json.dumps(sample,ensure_ascii=False)+'\n') random.seed(42) helpful_rejection_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "hhrlhf", "helpful-rejection"), if_score = True, if_rerank=False, split = 'dev') random.shuffle(helpful_rejection_dev_data) - with open(os.path.join('..','..','data','test','helpful_rejection.json'),'w', encoding='utf-8') as f: + with open(os.path.join('..','..','data','hh_test','helpful_rejection.json'),'w', encoding='utf-8') as f: for sample in helpful_rejection_dev_data: f.write(json.dumps(sample,ensure_ascii=False)+'\n') \ No newline at end of file diff --git a/PRO/train/summarize_preprocess_data/step_1_process.py b/PRO/train/summarize_preprocess_data/step_1_process.py new file mode 100644 index 00000000..69200fd0 --- /dev/null +++ b/PRO/train/summarize_preprocess_data/step_1_process.py @@ -0,0 +1,109 @@ +import json +import re +import pprint +import os +import tqdm +import random +random.seed(42) + +def gen_global_index(): + index = 0 + while True: + yield index + index += 1 + +def split_trans(split): + if split == 'train' or split == 'test' or split == 'dev': + return split + elif split == 'valid': + return 'dev' + elif split == 'valid1': + return 'dev' + elif split == 'valid2': + return 'test' + else: + raise Exception('guaiguaidigai') + +def summarize_from_feedback_preprocess(path,index_generator): + files = os.listdir(path) + files = [filename for filename in files if filename.endswith('.json')] + target_samples = { + 'train':[], + 'dev':[], + 'test':[] + } + + for filename in files: + with open(os.path.join(path,filename),'r', encoding="utf-8") as f: + raw = f.readlines() + + data = [] + for line in raw: + line = json.loads(line) + data.append(line) + + samples = [] + bar = tqdm.tqdm(data) + for index,sample in enumerate(bar): + bar.set_description(os.path.join(path,filename)) + assert len(sample['summaries']) == 2 + if 'post' in sample['info']: + prefix = "SUBREDDIT: r/{}\nTITLE: {}\nPOST: {}\nTL;DR:".format(sample['info']['subreddit'], sample['info']['title'],sample['info']['post']).strip() + one_sample = { + 'available': [ + { + 'id':next(index_generator), + 'prefix': prefix, + 'target_num':2, + 'target':[ + " {}".format(sample['summaries'][sample['choice']]['text'].strip()), + " {}".format(sample['summaries'][1-sample['choice']]['text'].strip()), + ] + }, + ], + 'split': split_trans(sample['split']), + 'source': { + 'path': os.path.join(path,filename), + 'line_num': index+1, + } + } + target_samples[one_sample['split']].append(one_sample) + else: + prefix = "Article: {}\nTL;DR:".format(sample['info']['article']) + pass + + os.makedirs(path.replace('raw_data','preprocessed_data'), exist_ok=True) + + true_dev_index = random.sample(list(range(len(target_samples['dev']))),1000) + true_dev = [] + for index, sample in enumerate(target_samples['dev']): + if index in true_dev_index: + sample['split'] = 'dev' + true_dev.append(sample) + else: + sample['split'] = 'train' + target_samples['train'].append(sample) + target_samples['dev'] = true_dev + + with open(os.path.join(path.replace('raw_data','preprocessed_data'), "train.json"), 'w', encoding='utf-8') as f: + for sample in target_samples['train']: + f.write(json.dumps(sample,ensure_ascii=False)+'\n') + print("{}: {}".format(os.path.join(path.replace('raw_data','preprocessed_data'),"train.json"),len(target_samples['train']))) + + with open(os.path.join(path.replace('raw_data','preprocessed_data'), "dev.json"), 'w', encoding='utf-8') as f: + for sample in target_samples['dev']: + f.write(json.dumps(sample,ensure_ascii=False)+'\n') + print("{}: {}".format(os.path.join(path.replace('raw_data','preprocessed_data'),"dev.json"),len(target_samples['dev']))) + + with open(os.path.join(path.replace('raw_data','preprocessed_data'), "test.json"), 'w', encoding='utf-8') as f: + for sample in target_samples['test']: + f.write(json.dumps(sample,ensure_ascii=False)+'\n') + print("{}: {}".format(os.path.join(path.replace('raw_data','preprocessed_data'),"test.json"),len(target_samples['test']))) + +if __name__ == "__main__": + global_index_generator = gen_global_index() + + summarize_from_feedback_preprocess( + os.path.join('..','..','data','raw_data','summarize_from_feedback','comparisons'), + global_index_generator + ) \ No newline at end of file diff --git a/PRO/train/summarize_preprocess_data/step_2_gen_train_data.py b/PRO/train/summarize_preprocess_data/step_2_gen_train_data.py new file mode 100644 index 00000000..e1fc16e4 --- /dev/null +++ b/PRO/train/summarize_preprocess_data/step_2_gen_train_data.py @@ -0,0 +1,118 @@ +import os +import sys +sys.path.append("..") +import json +import random +import numpy as np +import tqdm +from utils.metrics_summarize import create_reward_fn +get_score, reward_batch_size = create_reward_fn() + +def split_trans(split): + if split == 'train' or split == 'test' or split == 'dev': + return split + elif split == 'valid': + return 'dev' + elif split == 'valid1': + return 'dev' + elif split == 'valid2': + return 'test' + else: + raise Exception('guaiguaidigai') + +def concat_wo_ranker(prefixes, suffixes): + #prefixes = [[a,b,c],[d,e,f]] + #suffixes = [[a,b,c],[d,e,f]] + training_stage_num = len(prefixes[0]) + batch_size = len(prefixes) + new_prefixes = sum(prefixes,[]) + new_suffixes = sum(suffixes,[]) + rewards = get_score(new_prefixes, new_suffixes).view(batch_size, training_stage_num).cpu().detach().numpy().tolist() #[batch_size, ranking] + + return prefixes, suffixes, rewards + +def reward_model_ranker(prefixes, suffixes): + #prefixes = [[a,b,c],[d,e,f]] + #suffixes = [[a,b,c],[d,e,f]] + training_stage_num = len(prefixes[0]) + batch_size = len(prefixes) + new_prefixes = sum(prefixes,[]) + new_suffixes = sum(suffixes,[]) + rewards = get_score(new_prefixes, new_suffixes).view(batch_size, training_stage_num).cpu().detach().numpy() #[batch_size, ranking] + indices = np.argsort(-rewards,axis=1) + prefixes = [[prefixes[i][index] for index in indices[i]] for i in range(batch_size)] + suffixes = [[suffixes[i][index] for index in indices[i]] for i in range(batch_size)] + rewards = [[float(rewards[i][index]) for index in indices[i]] for i in range(batch_size)] + return prefixes, suffixes, rewards + +def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, split='train'): + training_data = [] + with open(root_dir, 'r', encoding='utf-8') as f: + raw_data = f.readlines() + for line in raw_data: + sample = json.loads(line) + if split_trans(sample['split']) == split: + new_sample = {'meta': sample['source'], 'prefix':[],'suffix':[]} + if data_aug: + for s in sample['extended']+sample['available']: + for suffix in s['target']: + assert isinstance(suffix,str) + new_sample['prefix'].append(s['prefix']) + new_sample['suffix'].append(suffix) + else: + for s in sample['available']: + for suffix in s['target']: + assert isinstance(suffix,str) + new_sample['prefix'].append(s['prefix']) + new_sample['suffix'].append(suffix) + training_data.append(new_sample) + if training_stage_num == None: + training_stage_num = len(new_sample['prefix']) + assert training_stage_num == len(new_sample['prefix']) + + if if_score: + batch_size = reward_batch_size / 2 # default + for index in tqdm.tqdm(range(0,len(training_data),batch_size),desc="rewarding"): + prefixes = [] + suffixes = [] + if len(training_data)-index < batch_size: + batch_size = len(training_data)-index + for sub_index in range(batch_size): + prefixes.append(training_data[index+sub_index]['prefix']) + suffixes.append(training_data[index+sub_index]['suffix']) + if if_rerank: + prefixes, suffixes, rewards = reward_model_ranker(prefixes,suffixes) + else: + prefixes, suffixes, rewards = concat_wo_ranker(prefixes,suffixes) + for sub_index in range(batch_size): + training_data[index+sub_index]['prefix'] = prefixes[sub_index] + training_data[index+sub_index]['suffix'] = suffixes[sub_index] + training_data[index+sub_index]['reward'] = rewards[sub_index] + else: + for l in training_data: + l['reward'] = [1.0] * len(l['suffix']) + + for l in training_data: + l['sft_index'] = 0 + + return training_data + +if __name__ == '__main__': + root_dir = os.path.join('..','..','data',"preprocessed_data", "summarize_from_feedback", "comparisons") + data_aug = False + os.makedirs(os.path.join('..','..','data','summarize_train_len2'), exist_ok=True) + random.seed(42) + training_data = extract_train_data(root_dir = os.path.join(root_dir, "train.json"), if_score = True, if_rerank=True, split = 'train') + random.shuffle(training_data) + with open(os.path.join('..','..','data','summarize_train_len2','train.json'),'a', encoding='utf-8') as f: + for sample in training_data: + f.write(json.dumps(sample,ensure_ascii=False)+'\n') + + data_aug = False + os.makedirs(os.path.join('..','..','data','summarize_dev'), exist_ok=True) + random.seed(42) + total_dev_data = extract_train_data(root_dir = os.path.join(root_dir, "dev.json"), if_score = True, if_rerank=False, split = 'dev') + random.shuffle(total_dev_data) + with open(os.path.join('..','..','data','summarize_dev','sampled_dev.json'),'a', encoding='utf-8') as f: + for sample in total_dev_data: + f.write(json.dumps(sample,ensure_ascii=False)+'\n') \ No newline at end of file diff --git a/PRO/train/summarize_preprocess_data/step_3_gen_test_data.py b/PRO/train/summarize_preprocess_data/step_3_gen_test_data.py new file mode 100644 index 00000000..fd64e58b --- /dev/null +++ b/PRO/train/summarize_preprocess_data/step_3_gen_test_data.py @@ -0,0 +1,109 @@ +import os +import sys +sys.path.append("..") +import json +import random +import numpy as np +import tqdm +from utils.metrics import create_reward_fn_2 +get_score, reward_batch_size = create_reward_fn_2() + +def split_trans(split): + if split == 'train' or split == 'test' or split == 'dev': + return split + elif split == 'valid': + return 'dev' + elif split == 'valid1': + return 'dev' + elif split == 'valid2': + return 'test' + else: + raise Exception('guaiguaidigai') + +def concat_wo_ranker(prefixes, suffixes): + #prefixes = [[a,b,c],[d,e,f]] + #suffixes = [[a,b,c],[d,e,f]] + training_stage_num = len(prefixes[0]) + batch_size = len(prefixes) + new_prefixes = sum(prefixes,[]) + new_suffixes = sum(suffixes,[]) + rewards = get_score(new_prefixes, new_suffixes).view(batch_size, training_stage_num).cpu().detach().numpy().tolist() #[batch_size, ranking] + + return prefixes, suffixes, rewards + +def reward_model_ranker(prefixes, suffixes): + #prefixes = [[a,b,c],[d,e,f]] + #suffixes = [[a,b,c],[d,e,f]] + training_stage_num = len(prefixes[0]) + batch_size = len(prefixes) + new_prefixes = sum(prefixes,[]) + new_suffixes = sum(suffixes,[]) + rewards = get_score(new_prefixes, new_suffixes).view(batch_size, training_stage_num).cpu().detach().numpy() #[batch_size, ranking] + indices = np.argsort(-rewards,axis=1) + prefixes = [[prefixes[i][index] for index in indices[i]] for i in range(batch_size)] + suffixes = [[suffixes[i][index] for index in indices[i]] for i in range(batch_size)] + rewards = [[float(rewards[i][index]) for index in indices[i]] for i in range(batch_size)] + return prefixes, suffixes, rewards + +def extract_train_data(root_dir, if_score, if_rerank, training_stage_num = None, split='train'): + training_data = [] + with open(root_dir, 'r', encoding='utf-8') as f: + raw_data = f.readlines() + for line in raw_data: + sample = json.loads(line) + if split_trans(sample['split']) == split: + new_sample = {'meta': sample['source'], 'prefix':[],'suffix':[]} + if data_aug: + for s in sample['extended']+sample['available']: + for suffix in s['target']: + assert isinstance(suffix,str) + new_sample['prefix'].append(s['prefix']) + new_sample['suffix'].append(suffix) + else: + for s in sample['available']: + for suffix in s['target']: + assert isinstance(suffix,str) + new_sample['prefix'].append(s['prefix']) + new_sample['suffix'].append(suffix) + training_data.append(new_sample) + if training_stage_num == None: + training_stage_num = len(new_sample['prefix']) + assert training_stage_num == len(new_sample['prefix']) + + if if_score: + batch_size = reward_batch_size / 2 # default + for index in tqdm.tqdm(range(0,len(training_data),batch_size),desc="rewarding"): + prefixes = [] + suffixes = [] + if len(training_data)-index < batch_size: + batch_size = len(training_data)-index + for sub_index in range(batch_size): + prefixes.append(training_data[index+sub_index]['prefix']) + suffixes.append(training_data[index+sub_index]['suffix']) + if if_rerank: + prefixes, suffixes, rewards = reward_model_ranker(prefixes,suffixes) + else: + prefixes, suffixes, rewards = concat_wo_ranker(prefixes,suffixes) + for sub_index in range(batch_size): + training_data[index+sub_index]['prefix'] = prefixes[sub_index] + training_data[index+sub_index]['suffix'] = suffixes[sub_index] + training_data[index+sub_index]['reward'] = rewards[sub_index] + else: + for l in training_data: + l['reward'] = [1.0] * len(l['suffix']) + + for l in training_data: + l['sft_index'] = 0 + + return training_data + +if __name__ == '__main__': + root_dir = os.path.join('..','..','data',"preprocessed_data", "summarize_from_feedback", "comparisons") + data_aug = False + os.makedirs(os.path.join('..','..','data','summarize_test'), exist_ok=True) + random.seed(42) + test_data = extract_train_data(root_dir = os.path.join(root_dir, "test.json"), if_score = True, if_rerank=False, split = 'test') + random.shuffle(test_data) + with open(os.path.join('..','..','data','summarize_test','test.json'),'w', encoding='utf-8') as f: + for sample in test_data: + f.write(json.dumps(sample,ensure_ascii=False)+'\n') \ No newline at end of file diff --git a/PRO/train/train3_summarize.sh b/PRO/train/train3_summarize.sh new file mode 100755 index 00000000..7df8fc2b --- /dev/null +++ b/PRO/train/train3_summarize.sh @@ -0,0 +1,28 @@ +export OMP_NUM_THREADS=16 +root_dir=.. + +#stage 23 +id=$1 +data_path=$2 +ranking_len=$3 +mkdir -p $root_dir/logs/$id/$ranking_len +accelerate launch --num_processes 7 --config_file ds_config2.yaml main.py \ + --task summarize \ + --train_file_path $root_dir/data/${data_path} \ + --validation_file_path $root_dir/data/summarize_dev \ + --validation_file_name sampled_dev.json \ + --output_dir $root_dir/checkpoints/index_$id/stage_$ranking_len \ + --log_path $root_dir/logs/$id/$ranking_len \ + --index $id \ + --seed 42 \ + --temperature 1 \ + --sft_weight 0.05 \ + --num_train_epochs 2 \ + --training_stage_num $ranking_len \ + --block_size 720 \ + --learning_rate 5e-6 \ + --per_device_train_batch_size 1 \ + --per_device_eval_batch_size 28 \ + --model_name_or_path decapoda-research/llama-7b-hf \ + --do_train \ + --do_validation > $root_dir/logs/$id/$ranking_len/train_detail.log 2>&1 \ No newline at end of file diff --git a/PRO/train/train.sh b/PRO/train/train_hh.sh similarity index 92% rename from PRO/train/train.sh rename to PRO/train/train_hh.sh index e180381c..d9c3271e 100755 --- a/PRO/train/train.sh +++ b/PRO/train/train_hh.sh @@ -7,8 +7,9 @@ data_path=$2 ranking_len=$3 mkdir -p $root_dir/logs/$id/$ranking_len accelerate launch --num_processes 7 --config_file ds_config.yaml main.py \ + --task hh \ --train_file_path $root_dir/data/${data_path} \ - --validation_file_path $root_dir/data/dev \ + --validation_file_path $root_dir/data/hh_dev \ --validation_file_name sampled_dev.json \ --output_dir $root_dir/checkpoints/index_$id/stage_$ranking_len \ --log_path $root_dir/logs/$id/$ranking_len \ diff --git a/PRO/train/train_summarize.sh b/PRO/train/train_summarize.sh new file mode 100755 index 00000000..8796d7f1 --- /dev/null +++ b/PRO/train/train_summarize.sh @@ -0,0 +1,28 @@ +export OMP_NUM_THREADS=16 +root_dir=.. + +#stage 23 +id=$1 +data_path=$2 +ranking_len=$3 +mkdir -p $root_dir/logs/$id/$ranking_len +accelerate launch --num_processes 7 --config_file ds_config.yaml main.py \ + --task summarize \ + --train_file_path $root_dir/data/${data_path} \ + --validation_file_path $root_dir/data/summarize_dev \ + --validation_file_name sampled_dev.json \ + --output_dir $root_dir/checkpoints/index_$id/stage_$ranking_len \ + --log_path $root_dir/logs/$id/$ranking_len \ + --index $id \ + --seed 42 \ + --temperature 1 \ + --sft_weight 0.05 \ + --num_train_epochs 2 \ + --training_stage_num $ranking_len \ + --block_size 720 \ + --learning_rate 5e-6 \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 28 \ + --model_name_or_path decapoda-research/llama-7b-hf \ + --do_train \ + --do_validation > $root_dir/logs/$id/$ranking_len/train_detail.log 2>&1 \ No newline at end of file diff --git a/PRO/train/utils/config.py b/PRO/train/utils/config.py index 8e51d8c9..084f6d02 100644 --- a/PRO/train/utils/config.py +++ b/PRO/train/utils/config.py @@ -6,6 +6,11 @@ def parse_args(): parser = argparse.ArgumentParser(description="Preference Ranking Optimization For Human Alignment") + parser.add_argument( + "--task", + type=str, + default="hh", + ) parser.add_argument( "--do_train", action="store_true", @@ -73,7 +78,7 @@ def parse_args(): type=int, default=20, ) - parser.add_argument("--num_train_epochs", type=int, default=1") + parser.add_argument("--num_train_epochs", type=int, default=1) parser.add_argument( "--max_train_steps", type=int, diff --git a/PRO/train/utils/data_manager.py b/PRO/train/utils/data_manager.py index 0af1681e..0c4efdb8 100644 --- a/PRO/train/utils/data_manager.py +++ b/PRO/train/utils/data_manager.py @@ -16,7 +16,7 @@ DataCollatorWithPadding, ) -class DataManager(): +class HH_DataManager(): def __init__(self, config, training_stage, tokenizer_path = args.model_name_or_path): self.config = config if self.config.architectures[0].lower() == "llamaforcausallm": @@ -49,7 +49,7 @@ def early_truncation(self, text): def train_data_collator(self, features): samples_num = len(features) - training_stage = self.training_stage #len(features[0]['input_ids']) + training_stage = self.training_stage origin_state = (self.tokenizer.padding_side, self.tokenizer.truncation_side) self.tokenizer.truncation_side = "left" @@ -83,7 +83,6 @@ def train_data_collator(self, features): )['input_ids'] ps_lens = [len(p_input_ids)-1 for p_input_ids in ps_input_ids] - # assemble self.tokenizer.padding_side = "right" self.tokenizer.truncation_side = "right" @@ -100,7 +99,6 @@ def train_data_collator(self, features): return_tensors = self.return_tensors, ) - #prepare prefix_mask seq_len = batch["attention_mask"].shape[1] prefix_mask = [] for p_len in ps_lens: @@ -168,7 +166,6 @@ def infer_generate(self, model, prefixes): max_length = self.max_length - 128, truncation = True, add_special_tokens = self.add_special_tokens, - # pad_to_multiple_of = self.pad_to_multiple_of, return_tensors = self.return_tensors, ).to(model.device) batch_size = len(prefixes) @@ -182,7 +179,170 @@ def infer_generate(self, model, prefixes): num_beams=1, do_sample=False, num_return_sequences = 1, - ) #tensor + ) + + instant_text = self.batch_decode(predicted_sents) + + # restore states + self.tokenizer.padding_side, self.tokenizer.truncation_side = origin_state + + for index in range(len(instant_text)): + assert truncated_prefixes[index].rstrip() in instant_text[index], (truncated_prefixes[index].strip(), instant_text[index]) + instant_text[index] = instant_text[index].replace(truncated_prefixes[index].rstrip(), "").strip() + instant_text[index] = self.early_truncation(instant_text[index]) + + return instant_text + +class Summarize_DataManager(): + def __init__(self, config, training_stage, tokenizer_path = args.model_name_or_path): + self.config = config + if self.config.architectures[0].lower() == "llamaforcausallm": + self.tokenizer = LlamaTokenizer.from_pretrained(tokenizer_path, use_fast=False) + self.tokenizer.unk_token = "" + self.tokenizer.bos_token = "" + self.tokenizer.eos_token = "" + else: + self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=False) + + self.tokenizer.pad_token = self.tokenizer.eos_token + self.padding = True + self.max_length = args.block_size + self.pad_to_multiple_of = 8 + self.return_tensors = "pt" + self.add_special_tokens = True + self.training_stage = training_stage + self.stop_sequences = ["\n\n"] + + def batch_decode(self, model_output): + # model_output = [batch, seq_len] + return self.tokenizer.batch_decode(model_output, skip_special_tokens=True) + + def early_truncation(self, text): + for stop in self.stop_sequences: + stop_ix = text.find(stop) + if stop_ix >= 0: + text = text[:stop_ix].strip() + return text.strip() + + def train_data_collator(self, features): + samples_num = len(features) + training_stage = self.training_stage + origin_state = (self.tokenizer.padding_side, self.tokenizer.truncation_side) + + self.tokenizer.truncation_side = "right" + ps = [] + ss = [] + rs = [] + sft_index = [] + for feature_index, feature in enumerate(features): + for p, s, r in zip(feature['prefix'][:training_stage], feature['suffix'][:training_stage], feature['reward'][:training_stage]): + ps.append(p) + ss.append(s) + rs.append(r) + assert feature["sft_index"] < training_stage + sft_index.append(feature["sft_index"]) + + ps_input_ids = self.tokenizer( + ps, + add_special_tokens = self.add_special_tokens, + )['input_ids'] + ps_lens = [len(p_input_ids)-1 for p_input_ids in ps_input_ids] + + self.tokenizer.padding_side = "right" + self.tokenizer.truncation_side = "right" + + texts = [] + for p, s in zip(ps, ss): + texts.append(p + s) + + batch = self.tokenizer( + texts, + padding=self.padding, + max_length = self.max_length, + truncation = True, + add_special_tokens = self.add_special_tokens, + return_tensors = self.return_tensors, + ) + + seq_len = batch["attention_mask"].shape[1] + prefix_mask = [] + for p_len in ps_lens: + assert seq_len > p_len + prefix_mask.append( + [1 if i